// $Id: semphyStep.cpp 7805 2010-03-26 13:59:46Z privmane $ #include "semphyStep.h" #include "computePijComponent.h" #include "approxSemphyDistance.h" #include "minSpanTree.h" #include "rearangeTree.h" #include "correctToCanonialForm.h" #include "likelihoodComputation.h" #include "searchStatus.h" #include "talRandom.h" #include "computeUpAlg.h" #include "computeDownAlg.h" #include "constraints.h" #include "treeIt.h" #include "someUtil.h" #include //#include "bblEM.h" //#include "computeExactAlg.h" //#include "computeMarginalAlg.h" //#include "exactSemphyDistance.h" //#include "jointTable.h" semphyStep::semphyStep(tree& et,const tree* ctPtr, const sequenceContainer& sc, const stochasticProcess& sp, const computePijGam& pij0, const suffStatGlobalGam& cup, const suffStatGlobalGam& cdown, const bool useApproxCounts, const VdoubleRep& cprobAtEachPos, const VVdoubleRep& posteriorRateProbAtEachPos, const suffStatGlobalGam& computeMarginal, const Vdouble *weights, const MDOUBLE toll) : _et(et),_ctPtr(ctPtr),_sc(sc),_sp(sp),_pij0(pij0),_cup(cup),_cdown(cdown), _useApproxCounts(useApproxCounts),_cprobAtEachPos(cprobAtEachPos), _posteriorRateProbAtEachPos(posteriorRateProbAtEachPos), _computeMarginal(computeMarginal),_weights(weights),_toll(toll){ computeSemphyStep(); } // this function fixes the penalty table, so there is HUGE penalty // for connecting any member of the clade to a non-member. // We note, that the members of a "constrain clade" are all the leaves // in this clade, but also all the internal nodes of the clade, // including the root of the clade. // The idea is that by doing so, all the members of the clade are sure // to be clustered together, and all the members outside are also // sure to be clustered together. // Anyway, the HUGE penalty must be taken in the spanning tree // at least once: when the clade is connected to the rest of the "world". // Because we put a "HUGE" penalty, this penalty will only be taken once. // This will also work in the general case of several (nested or not) clades. // A condition that must be verify in order for the function to work: // say we want sequence A to cluster with sequence B. // What is the internal node connecting these two sequences? // In order for the algorithm to work, we must demand that the tree // according to which the penalty matrix was computed is compatible with the // constraint tree. // Thus, we take the internal nodes that are relevant - from the tree upon which // the constraints were computed. void semphyStep::addConstraintPenalty(VVdouble & penaltyTable) { // Here we set the constraints. // One of the functionality of this class, is to create a penalty // table (VVdouble) that corresponds to the constraints. // For example, if the constraint is that seq A and B are together, // Say that node N is their parent in the tree. // If S = {A,B,N} // In the penalty table that is created, there will be a "1" // between and member of S to a non member of S. // In all other cases, there will be 0. // If there are multiple constraints - it will be a superposition // of all the constaints. For example if // S1 = {A,B,C,N1,N2} // S2 = {A,B,N1} // In the penalty table between A and B it will be 0. // between A and C, it will be 1. // Between A and a node that is not in S1 or in S2 - it will be 2. constraints cons(*_ctPtr); cons.setTree(_et); // here we check the condition described above is fulfilled. if (!cons.fitsConstraints()){ // sanaty check LOGDO(1,_et.output(myLog::LogFile(),tree::PHYLIP,true)); LOGDO(1,_ctPtr->output(myLog::LogFile(),tree::PHYLIP,true)); LOGDO(1,cons.outputMissingClads(cerr)); errorMsg::reportError(" Tree does not fit constraints in SEMPHY step"); } VVdouble consPeneTable(cons.getPeneltyTable()); // here we compute the sum of penalty double sumOfPenalties=0.0; for (int i=0;i isRealTaxa(_et.getNodesNum(),0); treeIterTopDownConst tIt(_et); for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) { if (mynode->isLeaf()) {isRealTaxa[mynode->id()] = 1;} } // fillCpijUpDownExactMarginalProbAtEachPos(); semphyDistance* semDis1 = computeQmatrix(_weights);// NULL = WEIGHTS... VVdouble penetlyTable = *(semDis1->getLikeDistanceTablePtr()); // copy MDOUBLE score = 0.0; if (_ctPtr!=NULL) { addConstraintPenalty(penetlyTable); } rearrangeTree::pairSet inSet = minSpanTree::span_tree(penetlyTable,&score); maybePrintPenaltyTableAndStartingTree(penetlyTable); //(log output lvl 50) MDOUBLE qSTART = computeQstart(semDis1); maybePrintDistanceBetweenNodesTable(semDis1);//(log, 50) maybePrintSpanTreeListAndQAfterSpanTree(inSet,score);//(log, 50) rearrangeTree rearrangeTree1(&inSet,semDis1); rearrangeTree1.reconstructTree(_et); maybePrintTheTreeAfterRearrangeTree();//(log, 50) correctToCanonialForm ctcf(&_et,*semDis1->getDistanceTablePtr(),isRealTaxa); ctcf.correctTree(); maybePrintTheTreeAfterCorrectToCanonialForm();//(log, 50) maybePrintQspanMinusQinit(score,qSTART);//(log, 50) if (semDis1!=NULL) delete semDis1; } /* void semphyStep::fillCpijUpDownExactMarginalProbAtEachPos() { computeUpAlg::fillComputeUp(&_et,_pi,_computeUp1); _computeProbOfEachPos1->fillProbOfEachPosition(&_et,_pi,_computeUp1); computeDownAlg::fillDown(&_et,_pi,_computeUp1,_computeDown1); computeExactAlg::fillExact(&_et,_pi,_computeUp1,_computeDown1,_computeExact1); computeMarginalAlg::fillMarginal(&_et,_pi,_computeExact1,_computeProbOfEachPos1,_computeMarginal1); }*/ void semphyStep::maybePrintPenaltyTableAndStartingTree(const VVdouble& penetlyTable) const{ LOG(50,<< "start of semphy step"< allNodes; _et.getAllNodes(allNodes,_et.getRoot()); for (int n1=0; n1 < allNodes.size(); ++n1) { if (allNodes[n1]->father()!=NULL) qSTART += inSemDist->getLikeDistance(allNodes[n1]->id(),allNodes[n1]->father()->id()); } LOG(50,<<" q start = "<getDistanceTablePtr(); // copy for (int i=0; i < tTable.size(); ++i) { for (int k=0; k < tTable[i].size(); ++k) { LOG(50,<< tTable[i][k]<<"\t"); } LOG(50,<first)<<" is connected to "<< z->second<computeDistances(); return semDis1; }