// $Id: nj.cpp 9948 2011-10-23 15:53:03Z cohenofi $ // version 1.00 // last modified 3 Nov 2002 #include "nj.h" #include "errorMsg.h" #include "logFile.h" #include "treeUtil.h" #include #include #include using namespace std; //------------------------------------------ // general outline: // we follow Swofford's book, "Molecular Systematics" pg489. // currentNodes is the vector of the nodes that are "in process". // in the beggining, these are all the leaves. Once, 2 leaves are separeted, // they are excluded from currentNodes, and their father is added to currentNodes. // we (almost) finish the algorithm when currentNodes's size is 3. (i.e., we know the topology). // thus when we start from an evolutionary tree, all we do, is to construct a star (start) tree //------------------------------------------ //------------------------------------------ // constructor and start //------------------------------------------ tree NJalg::computeTree(VVdouble distances,const vector& names, const tree * const constriantTree /*= NULL*/){ assert(distances.size() == names.size()); tree resTree = startingTree(names); if (distances.size()<3) return resTree; vector currentNodes; resTree.getAllLeaves(currentNodes,resTree.getRoot()); if (constriantTree) { njConstraint njc(resTree, *constriantTree); while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances, njc); } else { while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances); } resTree.create_names_to_internal_nodes(); resTree.makeSureAllBranchesArePositive(); LOGDO(5,resTree.output(myLog::LogFile())); return resTree; } tree NJalg::startingTree(const vector& names) { return starTree(names); } tree NJalg::startingTree(const tree& inTree) { tree et; et.createRootNode(); vector allLeaves; inTree.getAllLeaves(allLeaves,inTree.getRoot()); vector names(allLeaves.size()); for (int k = 0 ; k < allLeaves.size(); ++k) names[k]=allLeaves[k]->name(); return startingTree(names); } void NJalg::updateBranchDistance(const VVdouble& distanceTable, const Vdouble& rValues, tree::nodeP nodeNew, tree::nodeP nodeI, tree::nodeP nodeJ, int Iplace, int Jplace) { MDOUBLE dis= (IplacesetDisToFather(DisI_new); nodeJ->setDisToFather(DisJ_new); } void NJalg::NJiterate(tree& et, vector& currentNodes, VVdouble& distanceTable) { Vdouble rVector = calc_r_values(currentNodes,distanceTable);//CHECK2 if (currentNodes.size() == 3) { update3taxaLevel(distanceTable,rVector,currentNodes); currentNodes.clear(); return; } int minRaw,minCol; calc_M_matrix(currentNodes,distanceTable,rVector,minRaw,minCol);//CHECK3 tree::nodeP nodeI = currentNodes[minRaw]; tree::nodeP nodeJ = currentNodes[minCol]; tree::nodeP theNewNode; theNewNode= SeparateNodes(et,nodeI,nodeJ); //CHECK4 updateBranchDistance(distanceTable,rVector,theNewNode,nodeI,nodeJ,minRaw,minCol); //CHECK6 et.create_names_to_internal_nodes(); UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol); } void NJalg::NJiterate(tree& et, vector& currentNodes, VVdouble& distanceTable, njConstraint& njc) { Vdouble rMatrix = calc_r_values(currentNodes,distanceTable);//CHECK2 if (currentNodes.size() == 3) { update3taxaLevel(distanceTable,rMatrix,currentNodes); currentNodes.clear(); return; } int minRaw,minCol; calc_M_matrix(currentNodes,distanceTable,rMatrix,minRaw,minCol, njc);//CHECK3 tree::nodeP nodeI = currentNodes[minRaw]; tree::nodeP nodeJ = currentNodes[minCol]; tree::nodeP theNewNode; theNewNode= SeparateNodes(et,nodeI,nodeJ); njc.join(nodeI, nodeJ, theNewNode); //CHECK4 updateBranchDistance(distanceTable,rMatrix,theNewNode,nodeI,nodeJ,minRaw,minCol); //CHECK6 et.create_names_to_internal_nodes(); UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol); LOGDO(15,et.output(myLog::LogFile(),tree::ANCESTORID)); } Vdouble NJalg::calc_r_values(vector& currentNodes, const VVdouble& distanceTable) { Vdouble r_values(currentNodes.size(),0.0); for (int i=0; i & currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol){ MDOUBLE min = VERYBIG; for (int i=0; i < currentNodes.size();++i){ for (int j =i+1; j < currentNodes.size();++j) { MDOUBLE dis= (i& currentNodes, const VVdouble& distanceTable, const Vdouble & r_values, int& minRaw,int& minCol, const njConstraint& njc){ MDOUBLE min = VERYBIG; MDOUBLE min_noc = VERYBIG; int minRaw_noc=-1,minCol_noc=-1; for (int i=0; i < currentNodes.size();++i){ for (int j =i+1; j < currentNodes.size();++j) { if (njc.isCompatible(currentNodes[i],currentNodes[j])) { MDOUBLE dis= (iname()<<","<name() <<"-> " << currentNodes[minRaw] ->name()<<","<name()<< " ("<father() != node2->father()) errorMsg::reportError(" error in function NJalg::SeparateNodes - nodes don't have the same father"); tree::nodeP fatherNode = node1->father(); tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum()); node1->setFather(theNewNode); theNewNode->setSon(node1); node2->setFather(theNewNode); theNewNode->setSon(node2); // remove from son list of father node. fatherNode->removeSon(node1); fatherNode->removeSon(node2); return theNewNode; } void NJalg::update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values, vector& currentNodes) { // update the distance of the 3 taxa that are left in the end, to the root. MDOUBLE dis0root = distanceTable[0][1]/2+0.5*(r_values[0]-r_values[1]); MDOUBLE dis1root = distanceTable[0][1]/2+0.5*(r_values[1]-r_values[0]); MDOUBLE dis2root = distanceTable[0][2]/2+0.5*(r_values[2]-r_values[0]); if (dis0rootsetDisToFather(dis0root); currentNodes[1]->setDisToFather(dis1root); currentNodes[2]->setDisToFather(dis2root); } void NJalg::UpdateDistanceTableAndCurrentNodes(vector& currentNodes, VVdouble& distanceTable, tree::nodeP nodeI, tree::nodeP nodeJ, tree::nodeP theNewNode, int Iplace, int Jplace) { // Iplace is the place of i in the "old" currentNodes vector int i,j; // updating currentNodes vector newCurrentNode= currentNodes; vector::iterator vec_iter1=remove( newCurrentNode.begin(),newCurrentNode.end(),nodeI ); newCurrentNode.erase(vec_iter1,newCurrentNode.end()); vector::iterator vec_iter2=remove( newCurrentNode.begin(),newCurrentNode.end(),nodeJ ); newCurrentNode.erase(vec_iter2,newCurrentNode.end()); newCurrentNode.push_back(theNewNode); map nodeIntMap1; for (int z=0; z::value_type(currentNodes[z],z)); } VVdouble newDisTable; newDisTable.resize(newCurrentNode.size()); for (int z1=0;z1 * weights) { VVresize(_startingDistanceTable,distanceTable.size(),distanceTable.size());// for printing stuff later. VVresize(LTable,distanceTable.size(),distanceTable.size());// for printing stuff later. int i,j; _nodeNames.resize(currentNodes.size()); for ( i=0; i < currentNodes.size(); i++) { _nodeNames[i] =(currentNodes[i]->name()); for ( j=i+1; j < currentNodes.size(); j++) { MDOUBLE tempDis = -2000.0; MDOUBLE resLikelihood; int seqnodeI_ID = sd.getId(currentNodes[i]->name()); int seqnodeJ_ID = sd.getId(currentNodes[j]->name()); const sequence& snodeI = *sd.getSeqPtr(seqnodeI_ID,true); const sequence& snodeJ = *sd.getSeqPtr(seqnodeJ_ID,true); tempDis = _cd->giveDistance(snodeI,snodeJ,weights,&resLikelihood); distanceTable[i][j] = tempDis; LTable[i][j] = resLikelihood; } } if (myLog::LogLevel()>4) { for (i=0; i < currentNodes.size(); i++) { for (j=i+1; j < currentNodes.size(); j++) { LOG(100,<<"nj distance ["<4) { // for (i=0; i < currentNodes.size(); i++) { // for (j=i+1; j < currentNodes.size(); j++) { // LOG(4,<<"nj likelihood for distance["<name()<<" = "<SetName(htuname); //CHECK5 //_myET->getRoot()->SetName("RootOfStar"); //CHECK6 // et.output(cout,et.getRoot(),tree::ANCESTOR); */