// $Id: bootstrap.cpp 962 2006-11-07 15:13:34Z privmane $ #include "definitions.h" #include "someUtil.h" #include "bootstrap.h" #include "splitTreeUtil.h" #include #include using namespace std; // ----------------------------------------------------------------------------------------- // ----------------------------- The constructor and its related functions ----------------- // ----------------------------------------------------------------------------------------- bootstrap::bootstrap(const treeVec& treevect):_numTrees(0), _nTaxa(0){ fillFromTreeVec(treevect); } bootstrap::bootstrap (const string& filename):_numTrees(0), _nTaxa(0){ fillFromTreeVec(getStartingTreeVecFromFile(filename)); } void bootstrap::fillFromTreeVec(const treeVec& treevect) { // for each tree, we compute the set of all splits. // we update for each split in each tree the split-map. // so we have the frequency of each split. for (treeVec::const_iterator i=treevect.begin();i!=treevect.end();++i) splitTree(*i); } // takes a tree, computes all splits and // enter them into the Splits map void bootstrap::splitTree(const tree& T){ _numTrees++; updateNtaxaAndNameMapAndValidateConsistency(T); splitSubTreeRecursivly(T.getRoot(), true); // the true because we call the recursion with the root. Otherwise it is false; } void bootstrap::updateNtaxaAndNameMapAndValidateConsistency(const tree& T) { if (!_nTaxa) { // only for the first tree, this part intializes the _nameMap and the _nTaxa _sequenceNames = getSequencesNames(T); for (_nTaxa=0;_nTaxa<_sequenceNames.size();++_nTaxa) { _nameMap[_sequenceNames[_nTaxa]] =_nTaxa; } } else { vector namesInT1 = getSequencesNames(T); if (namesInT1.size() < _nameMap.size()) { string errMs1 = "Not all trees have the same number of sequences. "; errMs1 += "tree number 1 has: "; errMs1 += int2string(_nameMap.size()); errMs1 += " while tree number: "; errMs1 += int2string(_numTrees); errMs1 += " has "; errMs1 += int2string(namesInT1.size()); errMs1 += "\nError in function bootstrap::splitTree"; errorMsg::reportError(errMs1); } for (int i=0; i < namesInT1.size(); ++i) { if (_nameMap.count(namesInT1[i])==0) { string errMs = "The taxa "; errMs += namesInT1[i]; errMs += " found in tree number "; errMs += int2string(_numTrees); errMs += " is not present in the first tree. Error in function bootstrap::splitTree"; errorMsg::reportError(errMs); } } } } set bootstrap::splitSubTreeRecursivly(const tree::nodeP &n, const bool isRoot) {//false // this function assumes that the root of the tree is not a leaf set s; // the id of all leaves of the subtree of the nodeP n. for(int i=0; igetNumberOfSons() ;++i) { set sonSet(splitSubTreeRecursivly(n->getSon(i))); set::iterator it = sonSet.begin(); for (; it != sonSet.end(); ++it) s.insert(*it); } if(isRoot) return s; if (n->isLeaf()) { s.insert(idFromName(n->name())); } else { // this avoids keeping track of trivial splits. set::const_iterator sBeg(s.begin()); set::const_iterator sEnd(s.end()); split sp(sBeg,sEnd,_nTaxa); _Splits.add(sp); } return(s); } // ----------------------------------------------------------------------------------------- // ----------------------------- getWeightsForTree ----------------------------------------- // ----------------------------------------------------------------------------------------- map bootstrap::getWeightsForTree(const tree& inTree) const { map v; recursivelyBuiltBPMap(inTree.getRoot(), v); return (v); } // the function returns the ids of the leaves in the subtree defined by rootOfSubtree. set bootstrap::recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map &v) const { set s; for(int i=0;igetNumberOfSons();++i) { set sonSet(recursivelyBuiltBPMap(rootOfSubtree->getSon(i),v)); set::iterator it = sonSet.begin(); for (; it != sonSet.end(); ++it) s.insert(*it); } if (rootOfSubtree->isLeaf()) { s.insert(idFromName(rootOfSubtree->name())); } set::const_iterator sBeg(s.begin()); set::const_iterator sEnd(s.end()); split sp(sBeg,sEnd,_nTaxa); v[rootOfSubtree->id()]=(static_cast(_Splits.counts(sp)))/_numTrees; return(s); } // We get different trees, and the id's are not consistent among different trees. // here, we map a name to a single id. int bootstrap::idFromName(const string & name) const { NameMap_t::const_iterator i(_nameMap.find(name)); if (i==_nameMap.end()) { string s="Can not find an Id for the taxa name:"; s+=name; s+="\n error in function bootstrap::idFromName\n"; errorMsg::reportError(s); } return(i->second); } // ----------------------------------------------------------------------------------------- // ----------------------------- Printing the bp ------------------------------------------ // ----------------------------------------------------------------------------------------- void bootstrap::print(ostream& sout){// = cout _Splits.print(sout); } void bootstrap::printTreeWithBPvalues(ostream &out, const tree &t, const map & v, const bool printBranchLenght) const{ recursivlyPrintTreeWithBPvalues(out,t.getRoot(),v, printBranchLenght); out<<";"; } void bootstrap::recursivlyPrintTreeWithBPvalues(ostream &out, const tree::nodeP &myNode, const map &v, const bool printBranchLenght) const { if (myNode->isLeaf()) { out << myNode->name(); if (printBranchLenght) out << ":"<dis2father(); return; } else { out <<"("; for (int i=0;igetNumberOfSons();++i) { if (i>0) out <<","; recursivlyPrintTreeWithBPvalues(out, myNode->getSon(i),v, printBranchLenght); } out <<")"; if (myNode->isRoot()==false) { if (printBranchLenght) out<<":"<dis2father(); map::const_iterator val=v.find(myNode->id()); if ((val!=v.end()) && val->second>0.0) { out << "["<second<<"]"; } } } } // for DEBUGGING ONLY: void bootstrap::print_names(ostream &out) const { NameMap_t::const_iterator i(_nameMap.begin()); for (;i!=_nameMap.end();++i) out << "{"<first<<" = "<second<<"}"< names; for (NameMap_t::const_iterator i(_nameMap.begin());i!=_nameMap.end();++i) names.push_back(i->first); // 2. create a star tree tree res = starTree(names); // 3. get the sorted vector of the splits from which the consensus is to be built. vector > sortedSplits = _Splits.sortSplits(); // 4. get a list of compatible splits MDOUBLE thresholdForNumTrees = threshold * _numTrees; vector consensus; for (int k=0; k < sortedSplits.size(); ++k) { bool compatible = true; if (sortedSplits[k].second < thresholdForNumTrees) break; for (vector::const_iterator j=consensus.begin(); j != consensus.end(); ++j) { if (!(sortedSplits[k].first.compatible(*j))) { compatible=false; break; } } if (compatible) { consensus.push_back(sortedSplits[k].first); } } // 5. Now we build a tree from all the compatible splits for (vector::iterator i1 = consensus.begin();i1!=consensus.end();++i1) { applySplit(res,*i1,_nameMap); } res.create_names_to_internal_nodes(); res.makeSureAllBranchesArePositive(); return (res); }