/* * * MultiBoost - Multi-purpose boosting package * * Copyright (C) 2010 AppStat group * Laboratoire de l'Accelerateur Lineaire * Universite Paris-Sud, 11, CNRS * * This file is part of the MultiBoost library * * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, 5th Floor, Boston, MA 02110-1301 USA * * Contact: Balazs Kegl (balazs.kegl@gmail.com) * Norman Casagrande (nova77@gmail.com) * Robert Busa-Fekete (busarobi@gmail.com) * * For more information and up-to-date version, please visit * * http://www.multiboost.org/ * */ /** * \file Feature.h Defines what a single Feature is. * \author busarobi * \date 14 May 2010 */ //#pragma warning( disable : 4786 ) #ifndef FEATURE_H_ #define FEATURE_H_ #include #include #include #include "IOdef.h" using namespace std; namespace MultiBoost { ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// template class filterediterator : public vector::iterator { public: filterediterator() : vector::iterator(), _pos(0) {} explicit filterediterator( BaseType* __i ) : vector::iterator(__i), _pos(0) {} filterediterator& operator++ () { ++(this->_M_current); return *this; } filterediterator operator++ ( int ) { filterediterator retval(this->_M_current); (this->_M_current)++; return retval; } /* * Here should be implemented the indirect indexing for the filtered data */ void uploadUsedIndexSet( const set* usedIndices ) {} protected: int _pos; }; ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// template class GenericFeature { public: // typedefs // for iterator typedef filterediterator iterator; typedef pair pIterator; // end of typedefs // constructor GenericFeature() : _dataRep(DR_UNKNOWN), _data(0) {} protected: eDataRep _dataRep; // sparse or not vector _data; // store the data column /** * For restricted indices, so if only a subset of the dataset is in use then we should store the indices of the used examples. * However, it seems to be easier to overload the iterator if we would like to use only a part of dataset. */ set* _usedIndices; public: /* * Post-processing after reading the data, for example the sorting should be implemented here! */ virtual void postProcessing( void ) = 0; /////////////////////////////////////////////////////////////////////////////////// //////// for the iterator /////////////////////////////////////////////////////////////////////////////////// /* * Has to be set the indirect indices */ iterator begin() { iterator it = static_cast(this->_data.begin()); //the indirect indices should be implemented here it.uploadUsedIndexSet(this->_usedIndices); return it; } iterator end() { iterator it = static_cast(this->_data.end()); return it; } pIterator getBeginEnd( set* usedIndices = 0 ) { if ( usedIndices ) return make_pair( this->begin(), this->end() ); else { } } /////////////////////////////////////////////////////////////////////////////////// ////////getters and setters /////////////////////////////////////////////////////////////////////////////////// vector& getData() const { return _data; } eDataRep& getDataRep() const { return _dataRep; } set* getUsedInidces() const { return _usedIndices; } void setData(vector _data) { this->_data = _data; } void setDataRep(eDataRep _dataRep) { this->_dataRep = _dataRep; } void setUsedInidces(const set* _usedInidces) { this->_usedInidces = _usedInidces; } }; ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// template class SparseGenericFeature : GenericFeature { public: SparseGenericFeature() : GenericFeature(), _valIdxsMap(0) {} protected: /** * The (row) indexes of the values of the Feature. * If this vector is empty the data is dense! * Example of sparse data: * @data * {1 X, 3 Y, 4 -1} * {1 Y, 2 Y, 4 -1} * (we ignore the label at column 4 for the moment) * 1st Feature object's member variable _data contains {X, Y} and _valIdxsMap {<1,1>, <2,2>} * 2nd Feature object's member variable _data contains {Y} and _valIdxsMap {<2,1>} * So, the key is the order of example and the value is the data * \remark This might be a heavy memory footprint for dense data, since it that case it * is simply not used. For each Example the total memory usage (empty) is 16 bytes. */ map _valIdxsMap; }; ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// template class NumericFeature : public GenericFeature { public: NumericFeature() : GenericFeature() {} protected: virtual void postProcessing( void ) {} }; ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// template class SparseNumericFeature : SparseGenericFeature { public: SparseNumericFeature() : GenericFeature() {} protected: virtual void postProcessing( void ) {} protected: }; ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// template class SortedNumericFeature : public NumericFeature { public: SortedNumericFeature() : NumericFeature(), _sortedData(0) {} protected: virtual void postProcessing( void ) {} protected: vector _sortedData; }; } // end of namespace MultiBoost #endif /* FEATURE_H_ */