#include "computeSubstitutionCounts.h" #include "computePosteriorExpectationOfSubstitutions.h" #include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h" #include "multipleStochasticProcess.h" #include "matrixUtils.h" #include "simulateJumps.h" #include "simulateCodonsJumps.h" #include "simulateJumpsAbstract.h" #include "treeIt.h" #include "treeUtil.h" /******************************************************************************************** computeSubstitutionCounts *********************************************************************************************/ computeSubstitutionCounts::computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum, const MDOUBLE probCutOffSum, bool isSilent): _tr(tr),_sc(sc),_pMSp(MultSpPtr),_outDir(outDir),_LpostPerSpPerCat(LpostPerSpPerCat), _simulationsIterNum(simulationsIterNum), _probCutOffSum(probCutOffSum),_isSilent(isSilent) { if(!_pMSp->getSPVecSize()){ errorMsg::reportError("Trying to call computeSubstitutionCounts with an empty multipleStochasticProcess object at computeSubstitutionCounts::computeSubstitutionCounts"); } _alphabetSize = _pMSp->getSp(0)->alphabetSize(); } computeSubstitutionCounts& computeSubstitutionCounts::operator=(const computeSubstitutionCounts &other){ if (this != &other) { // Check for self-assignment } return *this; } /******************************************************************************************** *********************************************************************************************/ void computeSubstitutionCounts::run() { for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ //if(sonStateIndex == fatherStateIndex) continue; _expMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0); _probMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0); } } resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_jointProb_PosNodeXY); resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_probChanges_PosNodeXY); resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_expChanges_PosNodeXY); computePosteriorOfChangeGivenTerminalsPerSpPerCat(); // GLM - multiple SPs } /******************************************************************************************** *********************************************************************************************/ void computeSubstitutionCounts::computePosteriorOfChangeGivenTerminalsPerSpPerCat() { int numOfSPs = _pMSp->getSPVecSize(); // per Sp for (int spIndex=0; spIndex < numOfSPs; ++spIndex) { // Per RateCategory -- All the computations are done while looping over rate categories stochasticProcess * currentSp = _pMSp->getSp(spIndex); int numOfRateCategories = currentSp->categories(); for (int rateCategIndex=0 ; rateCategIndex < numOfRateCategories;++rateCategIndex) { tree copy_et = _tr; MDOUBLE rateCategVal = currentSp->rates(rateCategIndex); MDOUBLE minimumRateCategVal = 0.0000001; MDOUBLE rate2multiply = max(rateCategVal,minimumRateCategVal); if(rateCategVal < minimumRateCategVal){ LOGnOUT(4, <<" >>> NOTE: the rate category "<runSimulation(_simulationsIterNum); if(!_isSilent) LOGnOUT(4,<<"finished simulations"<isReversible()) cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions(copy_et,_sc,currentSp); // Per POS,CAT else cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions_nonReversibleSp(copy_et,_sc,currentSp); // Per POS,CAT cpesPerRateCategoryPerPos->computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos); // II) Exp - take in account both: 1) simulations 2) posteriorsGivenTerminal VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,expChangesForBranchPerRateCategoryPerPos); VVdouble expVV = cpesPerRateCategoryPerPos->computeExpectationAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos, expChangesForBranchPerRateCategoryPerPos); // Per POS for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; _expMap_father2son[fatherStateIndex][sonStateIndex][pos] += expVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } // III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,probChangesForBranchPerRateCategoryPerPos); VVdouble probVV = cpesPerRateCategoryPerPos->computePosteriorAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos); for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; _probMap_father2son[fatherStateIndex][sonStateIndex][pos] += probVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } // Store all information PerCat,PerPOS for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState _jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; _probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; _expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos]; } } } delete(cpesPerRateCategoryPerPos); } delete(simPerRateCategory); // Per POS } // per rateCat } // Per Sp } /******************************************************************************************** printProbExp() print perPos (over all branches) use the members _expV01, _expV10 for basic *********************************************************************************************/ void computeSubstitutionCounts::printProbExp() { string posteriorExpectationOfChangeString = _outDir + "//" + "posteriorExpectationOfChange.txt"; ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str()); string posteriorProbabilityOfChangeString = _outDir + "//" + "posteriorProbabilityOfChange.txt"; ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str()); int fatherStateIndex,sonStateIndex; posteriorExpectationStream<<"#POS"<<"\t"; posteriorProbabilityStream<<"#POS"<<"\t"; for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; posteriorExpectationStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; posteriorProbabilityStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; } } posteriorExpectationStream<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"; } } countProbPerPosStream<id()][fatherStateIndex][sonStateIndex] > _probCutOffSum){//NIM out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<id()][fatherStateIndex][sonStateIndex]; } } } } outCount<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< mynode->name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< pos+1<<"\t"<name()<<"\t"<dis2father()<<"\t"<id()][fatherStateIndex][sonStateIndex]<<"\t"<id()][fatherStateIndex][sonStateIndex]<id()][fatherStateIndex][sonStateIndex]; expFather2Son[fatherStateIndex][sonStateIndex] += expChanges[mynode->id()][fatherStateIndex][sonStateIndex]; if (probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > countCutOff) countFather2Son[fatherStateIndex][sonStateIndex] += 1; } } } for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){ if(sonStateIndex == fatherStateIndex) continue; outSum<fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<< probFather2Son[fatherStateIndex][sonStateIndex]<<"\t"<