// $Id: likeDist.h 9752 2011-08-05 20:27:25Z rubi $ #ifndef ___LIKE_DIST_H #define ___LIKE_DIST_H #include "definitions.h" #include "countTableComponent.h" #include "distanceMethod.h" #include "stochasticProcess.h" #include "logFile.h" #include "jcDistance.h" #include "unObservableData.h" #include using namespace std; class likeDist : public distanceMethod { public: // WARNING: the stochasticProcess is NOT copied. The same object is used explicit likeDist(const stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001, unObservableData* unObservableData_p=NULL) : _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {} likeDist(const likeDist& other) : _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {} virtual likeDist* clone() const {return new likeDist(*this);} // This constructor allows non-const stochasticProcess so that likeDist will be able to change alpha, etc. explicit likeDist(stochasticProcess& sp, const MDOUBLE toll =0.0001, const MDOUBLE maxPairwiseDistance = 5.0, const MDOUBLE minPairwiseDistance = 0.0000001) : _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {} // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. const MDOUBLE giveDistance(const countTableComponentGam& ctc, MDOUBLE& resQ, const MDOUBLE initialGuess= 0.03) const; // initial guess // given two sequences, it evaluates the log likelihood. MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2, const MDOUBLE dis2evaluate); // returns the estimated ML distance between the 2 sequences. // if score is given, it will be the log-likelihood. const MDOUBLE giveDistance(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; // this function creates a countTableComponent (ctc) from the two sequences. // it then computes the distance from this ctc. // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER). // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood. MDOUBLE giveDistanceThroughCTC(const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const; const MDOUBLE giveLikelihood(const sequence& s1, const sequence& s2, MDOUBLE distance, const vector * weights=NULL) const; // return the stochasticProcess const stochasticProcess& getStochasticProcess() const {return _sp;} stochasticProcess& getNonConstStochasticProcess(); bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;} MDOUBLE getToll() const {return _toll;} MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;} protected: const stochasticProcess &_sp; stochasticProcess *_nonConstSpPtr; const MDOUBLE _toll; const MDOUBLE _maxPairwiseDistance; const MDOUBLE _minPairwiseDistance; jcDistance _jcDist; unObservableData* _unObservableData_p; private: const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc, MDOUBLE& resL, const MDOUBLE initialGuess= 0.03) const; // initial guess public: static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp, const sequence& s1, const sequence& s2, const MDOUBLE dist, const vector * weights=NULL); }; ////////////////////////////////////////////////////////////////////////// class C_evalLikeDist{ private: const countTableComponentGam& _ctc; const stochasticProcess& _sp; unObservableData* _unObservableData_p; public: C_evalLikeDist(const countTableComponentGam& ctc, const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL) :_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {}; MDOUBLE operator() (MDOUBLE dist) { const MDOUBLE epsilonPIJ = 1e-10; MDOUBLE sumL=0.0; for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) { MDOUBLE rate = _sp.rates(rateCategor); MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate); if (pijgetlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist LOG(8,<<"check bl="<