// $Id: likelihoodComputation.cpp 9899 2011-10-11 19:56:48Z rubi $ #include "definitions.h" #include "tree.h" #include "computeUpAlg.h" #include "likelihoodComputation.h" #include "gammaUtilities.h" #include #include using namespace likelihoodComputation; /******************************************************************************************** likelihood computation - full data (1) *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const Vdouble * const weights, unObservableData *unObservableData_p) { computePijGam pi; pi.fillPij(et,sp); MDOUBLE logLforMissingData; MDOUBLE LforMissingData; if(unObservableData_p){ logLforMissingData = unObservableData_p->getlogLforMissingData(); LforMissingData = exp(logLforMissingData); } MDOUBLE res =0; doubleRep LofPos; int k; for (k=0; k < sc.seqLen(); ++k) { LofPos = likelihoodComputation::getLofPos(k,//pos, et, //const tree& sc, // sequenceContainer& sc, pi, //const computePijGam& , sp, NULL); if(unObservableData_p){ // conditioning on observability for all rateCat. LofPos = LofPos / (1- LforMissingData); } res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& ); } //if(unObservableData_p){ // conditioning on observability for allPos & allRateCat // res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData())); //} return res; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, unObservableData *unObservableData_p) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); // ver1 - fix likelihoodForEachCat by LforMissingDataPerCat - Wrong version... //if(pLforMissingDataPerCat){ // tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]); //} tmp += tmpPerCat[i]*sp.ratesProb(i); } // ver2 - fix likelihoodForEachCat by LforMissingDataAll if(unObservableData_p){ // conditioning on observability for all rateCat. tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData())); } return tmp; } /******************************************************************************************** likelihood computation - per pos, per cat (1.1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const computePijHom& pi, const stochasticProcess& sp, unObservableData *unObservableData_p) { computeUpAlg cup; suffStatGlobalHomPos ssc; cup.fillComputeUp(et,sc,pos,pi,ssc); doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let); if (!DBIG_EQUAL(convert(tmpLcat), 0.0)) { cerr<<"tmpLcat = "<=0.0); tmp+=tmpLcat; } // cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP if (!DBIG_EQUAL(convert(tmp), 0.0)){ LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<getlogLforMissingData())); } return tmp; } //r4s_proportional /******************************************************************************************** likelihood computation - full data (1) *********************************************************************************************/ Vdouble likelihoodComputation::getTreeLikelihoodProportionalAllPosAlphTheSame(const tree& et, const vector& sc, multipleStochasticProcess* msp, const gammaDistribution* pProportionDist, const Vdouble * const weights) { Vdouble geneLikelihoodVec; //geneRateLikelihoodVec[geneN][globalRateCateg] will hold the LL of the gene given the global rate VVdouble geneRateLikelihoodVec; geneLikelihoodVec.resize(sc.size(),0.0); geneRateLikelihoodVec.resize(sc.size()); for(int geneN = 0;geneN < sc.size();++geneN){ geneRateLikelihoodVec[geneN].resize(pProportionDist->categories(),0.0); for(int globalRateCateg = 0;globalRateCateg < pProportionDist->categories();++globalRateCateg){ msp->getSp(geneN)->setGlobalRate(pProportionDist->rates(globalRateCateg)); computePijGam pi; pi.fillPij(et,*msp->getSp(geneN)); doubleRep LofPos; for (int k=0; k < sc[geneN].seqLen(); ++k) { //LofPos is sum LofPos_LocalRateCat_i*p(LocalRateCat_i) LofPos = likelihoodComputation::getLofPosProportional(k,//pos, et, //const tree& sc[geneN], // sequenceContainer& sc, pi, //const computePijGam& , *msp->getSp(geneN)); //removed the prior of the globar rate categ cause it is multiplied below geneRateLikelihoodVec[geneN][globalRateCateg] += log(LofPos)*(weights?(*weights)[k]:1); } } //Once we are finished iterating over all globalRateCategs we need to sum the log likelihood for this gene //which is: log(prior(globalRateCateg_i)*exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+prior(globalRateCateg_j)*exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..) //assuming a flat prior this equals: log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]..) //which can be written as:log(prior(globalRateCateg))+log(exp(geneRateLikelihoodVec[geneN][globalRateCateg_i]))(1+exp(geneRateLikelihoodVec[geneN][globalRateCateg_j]-geneRateLikelihoodVec[geneN][globalRateCateg_i]..) geneLikelihoodVec[geneN] = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec[geneN]);//Strictly assumes a flat prior distribution } return geneLikelihoodVec; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ //Old - remove when QA is done doubleRep likelihoodComputation::getLofPosProportional(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, const MDOUBLE globalRateProb) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); tmp += tmpPerCat[i]*sp.ratesProb(i)*globalRateProb; //old - now globalRateProb is multipled outside } return tmp; } /******************************************************************************************** likelihood computation - per pos (1.1) *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosProportional(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp) { // with the pi already computed. doubleRep tmp=0; int numOfCat = sp.categories(); VdoubleRep tmpPerCat; tmpPerCat.resize(numOfCat); for (int i=0; i < sp.categories();++i) { tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp); tmp += tmpPerCat[i]*sp.ratesProb(i); } return tmp; } //r4s_proportional /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledHom(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHomPos& ssc){ // using the pij of stochastic process rather than pre computed pij's... if (ssc.size()==0) {errorMsg::reportError("error in function likelihoodComputation::getLofPosWhenUpIsFilled");} doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let); tmp+=tmpLcat; } return tmp; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosHomModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp){ // using the pij of stochastic process rather than pre computed pij's... if (sp.categories()!=1) { errorMsg::reportError("num of categories in function getLofPosHomModel must be one"); } computeUpAlg cup; suffStatGlobalHomPos ssc; computePijHom cpij; cpij.fillPij(et,sp); cup.fillComputeUp(et,sc,pos,cpij,ssc); return getProbOfPosWhenUpIsFilledHom(pos,et,sc,sp,ssc); } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosGamModelEachSiteDifferentRate(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp){ computePijGam pi; pi.fillPij(et,sp); return getLofPos(pos,et,sc,pi,sp); } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPos(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const MDOUBLE gRate){ // when there is a global rate for this position // using the pij of stochastic process rather than pre computed pij's... computeUpAlg cup; suffStatGlobalHomPos ssc; cup.fillComputeUpSpecificGlobalRate(et,sc,pos,sp,ssc,gRate); doubleRep tmp = 0.0; for (int let = 0; let < sp.alphabetSize(); ++let) { doubleRep tmpLcat= ssc.get(et.getRoot()->id(),let)* sp.freq(let);; assert(tmpLcat>=0.0); tmp+=tmpLcat; } return tmp; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getLofPosAndPosteriorOfRates(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp, VdoubleRep& postrior){ // with the pi already computed. doubleRep tmp=0; for (int i=0; i < sp.categories();++i) { postrior[i]=getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i); tmp += postrior[i]; } for (int i=0; i < sp.categories();++i) postrior[i] /= tmp; return tmp; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, const Vdouble * weights) { MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } like += log(tmp) * (weights?(*weights)[pos]:1); } return like; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights, unObservableData* unObservableData_p) { posLike.clear(); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } assert(tmp>0.0); if(unObservableData_p){ tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData())); } like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } /******************************************************************************************** *********************************************************************************************/ //old MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights) { posLike.clear(); MDOUBLE like = 0.0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp(0.0); for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*pProportionDist->ratesProb(globalRateCategor)*sp.ratesProb(localRateCategor); } } assert(tmp>0.0); like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } //new MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et, const sequenceContainer& sc, stochasticProcess& sp, const suffStatGlobalGamProportional& cup, const gammaDistribution* pProportionDist, VVdoubleRep& posLike, const Vdouble * weights) { for(int pos = 0;pos < sc.seqLen();++pos){ posLike[pos].resize(pProportionDist->categories(),0.0); } Vdouble geneRateLikelihoodVec; geneRateLikelihoodVec.resize(pProportionDist->categories(),0.0); MDOUBLE like = 0.0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { VdoubleRep tmpVec; //hold the LofPos for each global rate category tmpVec.resize(pProportionDist->categories(),0.0);//This would sum for every global rate category for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ doubleRep tmp1(0.0); doubleRep tmp2(0.0); for (int localRateCategor = 0; localRateCategor < sp.categories(); ++localRateCategor) { doubleRep veryTmp(0.0); for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(pos,globalRateCategor,localRateCategor,et.getRoot()->id(),let) * sp.freq(let); } tmp1 += veryTmp; tmp2 += veryTmp*sp.ratesProb(localRateCategor); } tmpVec[globalRateCategor] += tmp2; posLike[pos][globalRateCategor] = tmp1; } for(int globalRateCategor = 0;globalRateCategor < pProportionDist->categories();++globalRateCategor){ assert(tmpVec[globalRateCategor]>0.0); geneRateLikelihoodVec[globalRateCategor] += log(tmpVec[globalRateCategor])*(weights?(*weights)[pos]:1); } } like = log(pProportionDist->ratesProb(0))+exponentResolver(geneRateLikelihoodVec); return like; } /******************************************************************************************** fill the posteriorLike matrix with each position posterior rate (p(r|D)) but without the weights. *********************************************************************************************/ MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, VVdoubleRep& posteriorLike, const Vdouble * weights) { suffStatGlobalGam cup; computeUpAlg cupAlg; computePijGam cpGam; cpGam.fillPij(et,sp); cupAlg.fillComputeUp(et,sc,cpGam,cup); return getPosteriorOfRates(et,sc,sp,cup,posteriorLike,weights); } // fill the posteriorLike matrix with each position posterior rate (p(r|D)) // but without the weights. MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, const Vdouble * weights) { posteriorLike.clear(); posteriorLike.resize(sc.seqLen()); for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories()); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep posProb=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } posProb += veryTmp*sp.ratesProb(categor); posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor); } like += log(posProb) * (weights?(*weights)[pos]:1); for (int categor1 = 0; categor1 < sp.categories(); ++categor1) { posteriorLike[pos][categor1] /= posProb; } } return like; } // fill the posteriorLike matrix with each position posterior rate (p(r|D)) // and the LLPP, but without the weights. MDOUBLE likelihoodComputation::getPosteriorOfRatesAndLLPP(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGam& cup, VVdoubleRep& posteriorLike, VdoubleRep& LLPerPos, const Vdouble * weights) { posteriorLike.clear(); posteriorLike.resize(sc.seqLen()); for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories()); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { LLPerPos[pos] = 0.0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let); } LLPerPos[pos] += veryTmp*sp.ratesProb(categor); posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor); } like += log(LLPerPos[pos]) * (weights?(*weights)[pos]:1); for (int categor1 = 0; categor1 < sp.categories(); ++categor1) { posteriorLike[pos][categor1] /= LLPerPos[pos]; } } return like; } // this function forces non gamma computation of likelihoods from up. // i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma. MDOUBLE likelihoodComputation::getTreeLikelihoodFromUpSpecifcRates(const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalHom& cup, VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights. const Vdouble * weights) { posLike.clear(); MDOUBLE like = 0; //computing the likelihood from up: for (int pos = 0; pos < sc.seqLen(); ++pos) { doubleRep tmp=0; for (int let =0; let < sc.getAlphabet()->size(); ++let) { tmp += cup.get(pos, et.getRoot()->id(), let) * sp.freq(let); } assert(tmp > 0); like += log(tmp) * (weights?(*weights)[pos]:1); posLike.push_back(tmp); } return like; } /******************************************************************************************** *********************************************************************************************/ doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledGam(const int pos, const tree& et, const sequenceContainer& sc, const stochasticProcess& sp, const suffStatGlobalGamPos& cup) { doubleRep tmp=0; for (int categor = 0; categor < sp.categories(); ++categor) { doubleRep veryTmp =0; for (int let =0; let < sc.alphabetSize(); ++let) { veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let); } tmp += veryTmp*sp.ratesProb(categor); } assert(tmp>0.0); return tmp; } /******************************************************************************************** *********************************************************************************************/ MDOUBLE likelihoodComputation::computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et, const stochasticProcess &sp, Vdouble &LLPerPos) { MDOUBLE treeLogLikelihood = 0.0; computePijGam cpij; cpij.fillPij(et, sp); LLPerPos.resize(sc.seqLen()); doubleRep LofPos; for (int pos=0; pos < sc.seqLen() ;++pos) { LofPos = likelihoodComputation::getLofPos(pos, et, sc, cpij, sp); MDOUBLE tmpLL = log(LofPos); treeLogLikelihood += tmpLL; LLPerPos[pos] = tmpLL; } return treeLogLikelihood; } /******************************************************************************************** likelihood for each category - used for unObservableData *********************************************************************************************/ Vdouble likelihoodComputation::getLofPosPerCat(const int pos, const tree& et, const sequenceContainer& sc, const computePijGam& pi, const stochasticProcess& sp) { // with the pi already computed. int numOfCat = sp.categories(); Vdouble tmp; tmp.resize(numOfCat); for (int i=0; i < numOfCat;++i) { tmp[i] = convert(getLofPos(pos,et,sc,pi[i],sp))*sp.ratesProb(i); } return tmp; } //doubleRep likelihoodComputation::getLofPos(const int pos, // const tree& et, // const sequenceContainer& sc, // const computePijGam& pi, // const stochasticProcess& sp){ //// with the pi already computed. // doubleRep tmp=0; // for (int i=0; i < sp.categories();++i) { // tmp += getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i); // } // return tmp; //} // MDOUBLE likelihoodComputation::getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha, // const Vdouble originalBounderi, // const VVdouble& posteriorLike, // const VdoubleRep& LLPP, // const Vdouble* weights) // { // int nCategories = originalBounderi.size()-1; // Vdouble rateWeights; rateWeights.resize(nCategories); // for (int i=0; i