// $Id: sequenceContainer.h 11662 2013-07-17 08:01:17Z cohenofi $ #ifndef ___SEQUENCE_CONTAINER #define ___SEQUENCE_CONTAINER #include "definitions.h" #include "sequence.h" #include "gainLossAlphabet.h" class sequenceContainer { public: class taxaIterator; friend class taxaIterator; class constTaxaIterator; friend class constTaxaIterator; //------------------------------------------------------------ //constructors: explicit sequenceContainer(); sequenceContainer(const sequenceContainer& other,const alphabet *inAlph); virtual ~sequenceContainer(); //questions only: const int seqLen() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].seqLen();} const int numberOfSeqs() const {return _seqDataVec.size();} const int alphabetSize() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].getAlphabet()->size();} const vector& getGeneralRemarks() const {return _generalRemarks;} const int makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs = false); //if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces const int getId(const string &seqName, bool issueWarninInNotFound=true) const;//return -1 if not found... sequence& operator[](const int id) {return _seqDataVec[_id2place[id]];} // get the ID of the sequence. Return the sequence itself. const sequence& operator[](const int id) const {return _seqDataVec[_id2place[id]];} const bool operator==(const sequenceContainer& sq) const; const sequence& getSeqDirectFromDataVec(int i){return _seqDataVec[i];} const Vstring names() const; // return a vector of the names of all the sequences. const string& name(const int id) const {return _seqDataVec[_id2place[id]].name();}; const alphabet* getAlphabet() const {return _seqDataVec[0].getAlphabet();} const vector getAlphabetDistribution(bool isCountUnknown=false) const; vector getSeqNamesThatMatchPos(const int pos, const char charId); const vector getAlphabetDistribution(int pos,bool isCountUnknown=false) const; //returns the number of positions that are invariable (all seqs are identical int getInvariablePosNum() const; bool isInvariable(const int pos) const; // computed the number of sequences without gaps at a specific position // for example, if the multiple sequence alignment is // AT- // AG- // A-M // numberOfSequencesWithoutGaps(0) = 3 // numberOfSequencesWithoutGaps(1) = 2 // numberOfSequencesWithoutGaps(2) = 1 int numberOfSequencesWithoutGaps(const int pos) const; int numberOfSequencesWithoutUnknowns(const int pos) const; //make changes: void resize(int t,const alphabet* inAlph) { if (inAlph == NULL) { errorMsg::reportError("cannot resize when the alphabet is unknown"); } sequence s(inAlph); _seqDataVec.resize(t,s); } void add(const sequence& inSeq); void remove(const int idSeq); void removeAll(); void removeIdenticalSequences(); int placeToId(const int place) const {return _seqDataVec[place].id();}; //get place in the vector and return the id of the sequence void addGeneralRemark(const string& inRemark) {_generalRemarks.push_back(inRemark);} void changeGaps2MissingData(); //removePositions: the positions to be removed are marked as '1' in posToRemoveVec //all other positions are '0' void removePositions(const Vint & posToRemoveVec); sequenceContainer getSubSeq(const int startPos, const int endPos); int getNumOfOccurancesPerPos(const int pos, const char charId); void removeGapPositions(); void removeGapPositionsAllSeqs(); void removeGapPositionsAccordingToAReferenceSeq(const string & seqName); void changeDotsToGoodCharacters(); void removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName); void concatenate(sequenceContainer& other); void startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes=1, const int minNumOfZeros=0); public: sequence::Iterator begin(const int id){//iterface to sequence iterator sequence::Iterator temp; temp.begin(_seqDataVec[id]); return temp; } sequence::Iterator end(const int id){//iterface to sequence iterator sequence::Iterator temp; temp.end(_seqDataVec[id]); return temp; } class taxaIterator { public: explicit taxaIterator(){}; ~taxaIterator(){}; void begin(sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.begin(); } void end(sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.end(); } sequence& operator* () {return *_pointer;} sequence const & operator* () const {return *_pointer;} sequence * operator-> () {return &*_pointer;} //MATAN- CHECK!!! sequence const * operator-> () const {return &* _pointer;} // MATAN - CHECK!!! void operator ++() {++_pointer;} void operator --() { --_pointer; } bool operator != (const taxaIterator& rhs){return (_pointer != rhs._pointer);} bool operator == (const taxaIterator& rhs){return (_pointer == rhs._pointer);} private: vector::iterator _pointer; };//end if class taxaIterator class constTaxaIterator { public: explicit constTaxaIterator(){}; ~constTaxaIterator(){}; void begin(const sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.begin(); } void end(const sequenceContainer & inSeqCont){ _pointer = inSeqCont._seqDataVec.end(); } sequence const & operator*() const {return *_pointer;} sequence const * operator->() const {return &*_pointer;}// MATAN - CHECK!!! void operator ++() {++_pointer;} void operator --() { --_pointer; } bool operator != (const constTaxaIterator& rhs) { return (_pointer != rhs._pointer); } bool operator == (const constTaxaIterator& rhs) { return (_pointer == rhs._pointer); } private: vector::const_iterator _pointer; }; public: // interfaces to iterators taxaIterator taxaBegin(const int id=0){// interface to taxaIterator taxaIterator temp; temp.begin(*this); return temp; } taxaIterator taxaEnd(){// interface to taxaIterator taxaIterator temp; temp.end(*this); return temp; } constTaxaIterator constTaxaBegin() const{ //interface to const taxaIter constTaxaIterator temp; temp.begin(*this); return temp; } constTaxaIterator constTaxaEnd() const{ constTaxaIterator temp; temp.end(*this); return temp; } private: vector _seqDataVec; vector _generalRemarks; vector _id2place; }; #endif