// $Id: distanceBasedSeqs2Tree.cpp 6002 2009-03-20 19:39:03Z privmane $ #include "distanceBasedSeqs2Tree.h" #include "uniDistribution.h" #include "distanceTable.h" #include "bestAlpha.h" #include "siteSpecificRate.h" #include "someUtil.h" #include "bblEM.h" #include "tamura92.h" #include "bestTamura92param.h" #include "bestGtrModelParams.h" #include #include "replacementModelSSRV.h" #include "trivialAccelerator.h" // ********************************************************************** // *** The basic non-iterative versions ********************************* // ********************************************************************** tree distanceBasedSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { _constraintTreePtr=constraintTreePtr; _weights = weights; // Calculate distance table tree et; VVdouble distTable; vector vNames; giveDistanceTable(_distM,sc,distTable,vNames,_weights); // Build tree from the distance table et = _dist2et->computeTree(distTable, vNames, _constraintTreePtr); LOG(6,<<"# distanceBasedSeqs2Tree::seqs2Tree: The reconsructed tree:"<categories() >1) _alpha = (static_cast(_spPtr->distr()))->getAlpha(); else _alpha=-99.9; // this should never be used } // *** Iterative tree building ****************************************** tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven) { LOGDO(3,printTime(myLog::LogFile())); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal:"< _treeLogLikelihood + _epsilonLikelihoodImprovement); LOGDO(3,printTime(myLog::LogFile())); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"< vNames; LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Started giveDistanceTable. "); LOGDO(7,printTime(myLog::LogFile())); if (!sideInfoSet) { // Then use homogeneous rates // Create homogeneous likeDist _alpha = 1.5; // Since no ASRV side info is known yet, we set an initial alpha for bestAlphaAndBBL optimizations uniDistribution distribution; stochasticProcess* uniDistSp = NULL; replacementModelSSRV* rmSSRV = dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel()); if (!rmSSRV) { uniDistSp = new stochasticProcess(&distribution, _spPtr->getPijAccelerator()); } else { trivialAccelerator pijAcc(rmSSRV->getBaseRM()); uniDistSp = new stochasticProcess(&distribution, &pijAcc); } likeDist homogeneousDist(*uniDistSp,static_cast(_distM)->getToll()); giveDistanceTable(&homogeneousDist,sc,distTable,vNames,_weights); delete uniDistSp; } else { // use the side information utilizeSideInfo(); giveDistanceTable(_distM,sc,distTable,vNames,_weights); } LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished giveDistanceTable, started distances2Tree::computeTree. "); LOGDO(7,printTime(myLog::LogFile())); // 2. Build tree from the distance table _newTree = _dist2et->computeTree(distTable, vNames, _constraintTreePtr); LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::computeTree, started optimizeSideInfo. "); LOGDO(7,printTime(myLog::LogFile())); // 3. Optimize branch lengths and side info for the tree topology _newTreeLogLikelihood=optimizeSideInfo(sc, _newTree); LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::optimizeSideInfo. "); LOGDO(7,printTime(myLog::LogFile())); if (!sideInfoSet) { LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"< vNames; utilizeSideInfo(); giveDistanceTable(_distM,sc,distTable,vNames,_weights); // Build tree from the distance table localScopeEt = _dist2et->computeTree(distTable,vNames, _constraintTreePtr); LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrapInternal:"<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc,weights,constraintTreePtr); } MDOUBLE commonAlphaDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the tamura92 model bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4BBL/*0.01*/, 5.0, _maxIterationsBBL, _alpha, 5.0 ); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the gtr model bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4alphaOptimiz, true, true); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else { bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0, _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz, _maxIterationsBBL); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } } MDOUBLE commonAlphaDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; (static_cast(_spPtr->distr()))->setAlpha(alpha); return likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et, sc, *_spPtr, _weights); } void commonAlphaDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; } void commonAlphaDistanceSeqs2Tree::utilizeSideInfo() { // set new alpha value in the sp that is used in _distM (static_cast(_spPtr->distr()))->setAlpha(_alpha); LOG(10,<<"# utilizing alpha"<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc,weights,constraintTreePtr); } MDOUBLE rate4siteDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { bblEM optimizer(et, sc, *_spPtr, _weights, _maxIterationsBBL, _epsilonLikelihoodImprovement4BBL); // Note: this verstion of ML rates computation can only use a uniDistribution stochasticProcess Vdouble likelihoods; MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement); //computeEB_EXP_siteSpecificRate return(treeLogLikelihood); } MDOUBLE rate4siteDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; Vdouble likelihoods; MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement); //computeEB_EXP_siteSpecificRate return(treeLogLikelihood); } void rate4siteDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; _rates = _newRates; } void rate4siteDistanceSeqs2Tree::utilizeSideInfo() { (static_cast(_distM))->setRates(_rates); LOG(10,<<"# utilizing rates"<(_spPtr->distr()))->setAlpha(_alpha); } void rate4siteDistanceSeqs2Tree::printSideInfo(ostream& out) const { if (_rates.size()) out<<"ML rates: "<<_rates<(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr); } // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) { return seqs2TreeIterative(sc, weights, constraintTreePtr); } MDOUBLE posteriorDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et) { if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the tamura92 model bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/, _epsilonLikelihoodImprovement4BBL/*0.01*/, 5.0, _maxIterationsBBL, _alpha, 5.0 ); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else if (dynamic_cast(_spPtr->getPijAccelerator()->getReplacementModel())) { // Optimizing params of the gtr model bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement, _epsilonLikelihoodImprovement4alphaOptimiz, true, true); _newAlpha=optimizer.getBestAlpha(); return(optimizer.getBestL()); } else { bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0, _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz, _maxIterationsBBL); _newAlpha=optimizer.getBestAlpha(); // cached only to make alpha optimization faster } // Compute posterior probabilities of rates per site return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior); } MDOUBLE posteriorDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) { _newAlpha = alpha; (static_cast(_spPtr->distr()))->setAlpha(alpha); // Compute posterior probabilities of rates per site return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior); } void posteriorDistanceSeqs2Tree::acceptSideInfo() { _alpha = _newAlpha; _posterior = _newPosterior; } void posteriorDistanceSeqs2Tree::utilizeSideInfo() { (static_cast(_distM))->setPosterior(_posterior); LOG(10,<<"# utilizing posterior"<(_spPtr->distr()))->setAlpha(_alpha); } void posteriorDistanceSeqs2Tree::printSideInfo(ostream& out) const { if (_posterior.size()) out<<_posterior<