// $Id: simulateTree.cpp 8508 2010-08-12 15:21:04Z rubi $ #include "definitions.h" #include "treeUtil.h" #include "simulateTree.h" #include "talRandom.h" #include "gammaDistribution.h" #include "codon.h" simulateTree::simulateTree(const tree& _inEt, const stochasticProcess& sp, const alphabet* alph) : _et(_inEt), _sp(sp),_alph(alph),_avgSubtitutionsPerSite(0.0) {}; simulateTree::~simulateTree() {} void simulateTree::generate_seq(int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); vector rateVec(seqLength); for (int h = 0; h < seqLength; h++) { int theRanCat = getRandCategory(h); rateVec[h] = _sp.rates(theRanCat); } _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha, Vdouble rates) { rates.clear(); rates.resize(seqLength); for (int h = 0; h < seqLength; h++) { rates[h] = talRandom::SampleGamma(alpha); } } void simulateTree::generate_seq_continuous_gamma(int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); vector rateVec(seqLength); MDOUBLE alpha= (static_cast(_sp.distr()))->getAlpha(); for (int h = 0; h < seqLength; h++) { rateVec[h] = talRandom::SampleGamma(alpha); } _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength) { if (_alph->size() != 4) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon is applicable only for nucleotide process"); if (seqLength %3 != 0) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: seqLenth should be a multiplicative of 3"); if (simRates.size() != seqLength) errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: the size of simRates should be identical to seqLenth"); // sequence justAseq(_alph); // vector simulatedSequences(_et.getNodesNum(),justAseq); vector simulatedSequences; //generate three nucleotide positions at a time. Repeat each position if the generated sequences contain stop codon Vdouble rateVec(3); bool bStopCodonFound = false; codon codonAlph; for (int p = 0; p < seqLength; p+=3) { rateVec[0] = simRates[p]; rateVec[1] = simRates[p+1]; rateVec[2] = simRates[p+2]; //generate 3 nucleotide positions with no stop codon for (int loop = 0; loop < 1000; ++loop) { bStopCodonFound = false; generate_seqWithRateVector(rateVec, 3); for (int s = 0; s < _simulatedSequences.size(); ++s) { string codonStr = _simulatedSequences[s].toString(); if (codonAlph.isStopCodon(codonStr)) { bStopCodonFound = true; break; } } if (!bStopCodonFound) break; } if (bStopCodonFound) errorMsg::reportError("Could not generate a position without stop codon"); //append positions to the positions generated so far if (p == 0) simulatedSequences = _simulatedSequences; //this will copy also the names of the sequences else { for (int i = 0; i < simulatedSequences.size(); ++i) simulatedSequences[i] += _simulatedSequences[i]; } } _simulatedSequences = simulatedSequences; } void simulateTree::generate_seqWithRateVector(const Vdouble& rateVec, const int seqLength) { sequence justAseq(_alph); _simulatedSequences.resize(_et.getNodesNum(),justAseq); for (int i=0; i < _simulatedSequences.size(); ++i) { _simulatedSequences[i].resize(seqLength); } generateRootSeq(seqLength); _avgSubtitutionsPerSite = 0.0; for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) { recursiveGenerateSpecificSeq(rateVec,seqLength,_et.getRoot()->getSon(p)); } _avgSubtitutionsPerSite /= 1.0*seqLength; } void simulateTree::generateRootSeq(int seqLength) { for (int i = 0; i < seqLength; i++) { _simulatedSequences[_et.getRoot()->id()][i] = giveRandomChar(); } _simulatedSequences[_et.getRoot()->id()].setAlphabet(_alph); _simulatedSequences[_et.getRoot()->id()].setName(_et.getRoot()->name()); _simulatedSequences[_et.getRoot()->id()].setID(_et.getRoot()->id()); } void simulateTree::recursiveGenerateSpecificSeq( const vector &rateVec, const int seqLength, tree::nodeP myNode) { for (int y = 0; y < seqLength; y++) { MDOUBLE lenFromFather=myNode->dis2father()*rateVec[y]; int aaInFather = _simulatedSequences[myNode->father()->id()][y]; int newChar = giveRandomChar(aaInFather,lenFromFather,y); if(newChar != aaInFather) _avgSubtitutionsPerSite += 1; _simulatedSequences[myNode->id()][y] = newChar; } _simulatedSequences[myNode->id()].setAlphabet(_alph); _simulatedSequences[myNode->id()].setName(myNode->name()); _simulatedSequences[myNode->id()].setID(myNode->id()); for (int x =0 ; x < myNode->getNumberOfSons(); ++x) { recursiveGenerateSpecificSeq(rateVec, seqLength, myNode->getSon(x)); } } int simulateTree::giveRandomChar() const { for (int loop =0 ;loop<100000 ;loop++) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE sum = 0.0; for (int j=0;j<_sp.alphabetSize();++j) { sum+=_sp.freq(j); if (theRandNum=0); assert(letterInFatherNode<_sp.alphabetSize()); for (int loop =0 ;loop<100000 ;loop++) { MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0); MDOUBLE sum = 0.0; for (int j=0;j<_sp.alphabetSize();++j) { sum+=_sp.Pij_t(letterInFatherNode,j, length); if (theRandNumisInternal()) continue; myseqData.add(_simulatedSequences[i]); } return myseqData; }