#include "codonUtils.h" #include "numRec.h" #include //check that the input sequences are divisable by 3 void checkInputSeqLength(string codonFile){ nucleotide alph; ifstream in(codonFile.c_str()); sequenceContainer inputSc = recognizeFormat::readUnAligned(in, &alph); in.close(); int i; for (i = 0; i < inputSc.numberOfSeqs(); ++i){ int seqLen = inputSc[i].seqLen(); if ((seqLen % 3) != 0){ string textToPrint = "USER ERROR: unable to read sequence: " + inputSc[i].name() + "\nSequence length is not divisable by three"; errorMsg::reportError(textToPrint); } } } //this function convert codon sequences to amino sequences. sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph){ amino aaAlph; sequenceContainer aaSc; for (int i = 0; i < codonSc.numberOfSeqs(); ++i){ sequence codonSeq = codonSc[i]; sequence aaSeq("", codonSeq.name(), codonSeq .remark(), codonSeq.id(), &aaAlph); for (int pos = 0; pos < codonSeq .seqLen(); ++pos) aaSeq.push_back(codonUtility::aaOf(codonSeq[pos],*codonAlph)); aaSc.add(aaSeq); } if (codonSc.numberOfSeqs() != aaSc.numberOfSeqs()) errorMsg::reportError("RevTrans: number of codon and Amino sequences is not the same"); return aaSc; } // returns 1/sumPijQij MDOUBLE getMatricesNormalizationFactor(vector & spVec,const distribution * forceDistr){ MDOUBLE sumPijQij=0.0; int categor; for ( categor=0; categorcategories();categor++) sumPijQij+=forceDistr->ratesProb(categor)*static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij(); if (sumPijQij ==0){ errorMsg::reportError("Error in getMatricesNormalizationFactor - sumPijQij=0"); } return sumPijQij; } // normalize the Q matrix so average rate of substitution = 1 void normalizeMatrices(vector & spVec,const distribution * forceDistr){ MDOUBLE sumPijQij=0.0; int categor; for ( categor=0; categorcategories();categor++) sumPijQij+=forceDistr->ratesProb(categor)*static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij(); if (sumPijQij ==0){ errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0"); } for (categor=0; categorcategories();categor++) static_cast(spVec[categor].getPijAccelerator()->getReplacementModel())->norm(1/sumPijQij); } Vdouble freqCodonF3x4(const sequenceContainer &nucSc, codon * coAlph){ VVdouble nucFeqPos(3); int pos= 0; int nPos = 0; for (nPos=0;nPos<3;nPos++) nucFeqPos[nPos].resize(nucSc.alphabetSize(),0.0); sequenceContainer::constTaxaIterator tIt; sequenceContainer::constTaxaIterator tItEnd; tIt.begin(nucSc); tItEnd.end(nucSc); while (tIt!= tItEnd) { pos = 0; sequence::constIterator sIt; sequence::constIterator sItEnd; sIt.begin(*tIt); sItEnd.end(*tIt); while (sIt != sItEnd) { if ((*sIt >= 0) && (*sIt size(),0.0); nucleotide n; for (int c = 0; cfromInt(c); int nuc0 = n.fromChar(s[0]); int nuc1 = n.fromChar(s[1]); int nuc2 = n.fromChar(s[2]); freqCodon[c] = nucFeqPos[0][nuc0]*nucFeqPos[1][nuc1]*nucFeqPos[2][nuc2]; } MDOUBLE sum=0; for (int i=0;isize();i++){ sum+=freqCodon[i]; } MDOUBLE stopFreq = 1.0 - sum; MDOUBLE ep = stopFreq/coAlph->size(); for (int i=0;isize();i++){ freqCodon[i]+=ep; } return freqCodon; } /*********************************************** The following functions are useful for the selecton server, for creating a Rasmol script and for setting the color value of each site ***********************************************/ // Positive significant in color dark yellow, non-sig. positive selection - light yellow. // Purifying selection in shades of bordeaux vector > create7ColorValues(){ vector > colorsValue; colorsValue.resize(7); for (int i=0;i<7;i++) colorsValue[i].resize(3); // RGB values of the differnt color bins colorsValue[0][0] = 255; //yellow positive significant colorsValue[0][1] = 220 ; colorsValue[0][2] = 0; colorsValue[1][0] =255 ; //light yellow - not significant positive selection colorsValue[1][1] = 255; colorsValue[1][2] = 120; //three categories of not significant negative selection according to bordeaux shades (colors like conseq/consurf) colorsValue[2][0] = 255; //white colorsValue[2][1] = 255; colorsValue[2][2] = 255; colorsValue[3][0] = 252; colorsValue[3][1] = 237; colorsValue[3][2] = 244; colorsValue[4][0] = 250; colorsValue[4][1] = 201; colorsValue[4][2] = 222; colorsValue[5][0] = 240; colorsValue[5][1] = 125; colorsValue[5][2] = 171; //significant negative selection colorsValue[6][0] = 130; colorsValue[6][1] = 67; colorsValue[6][2] = 96; return colorsValue; } //this functions creates a rasmol script (assumes positions are the same between the alignment and the PDB) void outToRasmolFile(string fileName,vector& color4Site){ ofstream out(fileName.c_str()); vector > colorsValue = create7ColorValues(); int numberOfColor = colorsValue.size(); vector > colors; //for each color (1-9/3) holds vector of sites. colors.resize(numberOfColor+1); int i; for (i=0;inumberOfColor){ errorMsg::reportError("Error in outToColorFile - unknown color"); } colors[color].push_back(i+1); //add site (position in the vector +1) } out<<"select all"< colors; int numOfSitesinAln = kaksVec.size(); Vdouble negativesKaksVec,negativesSite; negativesKaksVec.clear(); negativesSite.clear(); int i,gapsInRefSeq=0; for (i=0;i1) // color 1 (positive selection) : if confidence interval lower bound > 1 colors[i-gap]=1; else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant" colors[i-gap]=2; else { negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1 negativesSite.push_back(i-gap); //add the number of site of the kaks } } // now dealing with purifying selection Vdouble orderVec = negativesKaksVec; if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly) sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups MDOUBLE percentileNum = 5.0; int percentileNumInt = 5; Vdouble maxScoreForPercentile(percentileNumInt); if (orderVec.size()>0) { maxScoreForPercentile[0] = orderVec[0]; for (int c = 1; c < percentileNumInt; ++c){ int place = (int)((c / percentileNum) * negativesKaksVec.size()); MDOUBLE maxScore = orderVec[place]; maxScoreForPercentile[c] = maxScore; } } //loop over all the Ka/Ks < 1 for (int j=0; j < negativesKaksVec.size(); ++j){ MDOUBLE r = negativesKaksVec[j]; //the kaks of the site. int s = (int)negativesSite[j]; //the site. if (r > maxScoreForPercentile[4]) colors[s] = 3; else if (r > maxScoreForPercentile[3]) colors[s] = 4; else if (r> maxScoreForPercentile[2]) colors[s] = 5; else if (r > maxScoreForPercentile[1]) colors[s] = 6; else if (r >= maxScoreForPercentile[0]) colors[s] = 7; } //print to file ofstream out(fileName.c_str()); gap=0; amino aminoAcid; LOG(5,<<"Printing selection color bins to file"<