// $Id: jcDistance.h 1928 2007-04-04 16:46:12Z privmane $ #ifndef ___JC_DISTANCE #define ___JC_DISTANCE #include "definitions.h" #include "distanceMethod.h" #include #include /********************************************************* Jukes-Cantor distance method. Assumes no constraints on replacement from one state to another. Receives size of alphabet in constructor, and this enables to have one class for JC-distance for nucleotides, a.a., and codons Weights are an input vector for giving additional weight to positions in the sequences. *******************************************************/ class jcDistance : public distanceMethod { public: explicit jcDistance() {} virtual jcDistance* clone() const{ return new jcDistance(*this);} const MDOUBLE giveDistance( const sequence& s1, const sequence& s2, const vector * weights, MDOUBLE* score=NULL) const {//score is not used here if (typeid(s1.getAlphabet()) != typeid(s2.getAlphabet())) errorMsg::reportError("Error in jcDistance::giveDistance, s1 and s2 contain different type of alphabet"); // pS1Base and pS2Base are references to s1 and s2 respectively. // The method uses seq1 and seq2 and not s1 and s2, because when // the sequences contain mulAlphabet we must first convert them to the base alphabet const sequence* pS1Base(&s1); const sequence* pS2Base(&s2); const alphabet* alph = s1.getAlphabet(); // if s1 and contains mulAlphabet const mulAlphabet* mulAlph = dynamic_cast(alph); if (mulAlph!=NULL) { pS1Base = new sequence(s1,mulAlph->getBaseAlphabet()); pS2Base = new sequence(s2,mulAlph->getBaseAlphabet()); } int alphabetSize = pS1Base->getAlphabet()->size(); // const MDOUBLE MAXDISTANCE=2.0; const MDOUBLE MAXDISTANCE=15; MDOUBLE p =0; MDOUBLE len=0.0; if (weights == NULL) { for (int i = 0; i < pS1Base->seqLen() ; ++i) { if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data. len+=1.0; if ((*pS1Base)[i] != (*pS2Base)[i]) p++; } if (len==0) p=1; else p = p/len; } else { for (int i = 0; i < pS1Base->seqLen() ; ++i) { if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data. len += (*weights)[i]; if ((*pS1Base)[i] != (*pS2Base)[i]) p+=((*weights)[i]); } if (len==0) p=1; else { p = p/len; } } if (pS1Base != &s1) { delete pS1Base; delete pS2Base; } const MDOUBLE inLog = 1 - (MDOUBLE)alphabetSize*p/(alphabetSize-1.0); if (inLog<=0) { // LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG"); // LOG(6,<<" p="< * weights, MDOUBLE* score=NULL) const {//score is not used here // const MDOUBLE MAXDISTANCE=2.0; const MDOUBLE MAXDISTANCE=15; MDOUBLE p =0; MDOUBLE len=0.0; if (weights == NULL) { for (int i = 0; i < s1.seqLen() ; ++i) { //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data. len+=1.0; if (s1[i] != s2[i]) p++; } if (len==0) p=1; else p = p/len; } else { for (int i = 0; i < s1.seqLen() ; ++i) { //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data. len += (*weights)[i]; if (s1[i] != s2[i]) p+=((*weights)[i]); } if (len==0) p=1; else { p = p/len; } } const MDOUBLE inLog = 1 - (MDOUBLE)_alphabetSize*p/(_alphabetSize-1.0); if (inLog<=0) { // LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG"); // LOG(6,<<" p="<