mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-28 05:30:30 +08:00
190 lines
7.5 KiB
C++
190 lines
7.5 KiB
C++
// $Id: someUtil.h 11905 2013-12-26 10:12:03Z itaymay $
|
|
|
|
#ifndef ___SOME_UTIL_H
|
|
#define ___SOME_UTIL_H
|
|
|
|
#include "logFile.h"
|
|
#include "definitions.h"
|
|
#include "alphabet.h"
|
|
#include <string>
|
|
#include <iostream>
|
|
using namespace std;
|
|
|
|
//to be used for orderVec
|
|
template <class T>
|
|
class vecElem
|
|
{
|
|
public:
|
|
vecElem();
|
|
virtual ~vecElem() {};
|
|
void setValue(const T val) {m_value = val;}
|
|
T getValue() {return m_value;}
|
|
void setPlace(const int place) {m_place = place;}
|
|
int getPlace() {return m_place;}
|
|
inline bool operator< (const vecElem& elemIn) const;
|
|
private:
|
|
int m_place;
|
|
T m_value;
|
|
};
|
|
|
|
|
|
template <class T>
|
|
vecElem< T >::vecElem()
|
|
{
|
|
m_value = -1;
|
|
m_place = -1;
|
|
}
|
|
|
|
//template <class T>
|
|
//vecElement< T >::~vecElement()
|
|
//{
|
|
//}
|
|
template <class T>
|
|
bool vecElem< T >::operator<(const vecElem& elemIn) const
|
|
{
|
|
if (m_value == elemIn.m_value)
|
|
return (m_place < elemIn.m_place);
|
|
else
|
|
return (m_value < elemIn.m_value);
|
|
}
|
|
|
|
|
|
|
|
// STATISTICAL UTILITIES:
|
|
|
|
MDOUBLE computeAverage(const vector<int>& vec);
|
|
MDOUBLE computeAverage(const vector<MDOUBLE>& vec, const Vdouble* weightsV = NULL);
|
|
MDOUBLE computeAverageOfAbs(const vector<MDOUBLE>& vec, const Vdouble* weightsV = NULL);
|
|
MDOUBLE computeMedian(const vector<MDOUBLE>& vec);
|
|
MDOUBLE computeQuantile(const vector<MDOUBLE>& vec, MDOUBLE quantile);
|
|
MDOUBLE computeQuantileFrac(const vector<MDOUBLE>& vec, MDOUBLE quantile);
|
|
MDOUBLE computeStd(const vector<MDOUBLE>& vec);// page 60, Sokal and Rohlf
|
|
MDOUBLE computeStd(const vector<int>& vec);// page 60, Sokal and Rohlf
|
|
MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda);
|
|
// re-computes a vector of frequencies after one value is changed:
|
|
// all other values are set according to their relative value
|
|
void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs);//freqs is the old vector into which we write the new values
|
|
|
|
// SIMULATIONS:
|
|
int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities);
|
|
int giveRandomState(const int alphabetSize, const Vdouble &frequencies);
|
|
|
|
// TIME UTILITIES
|
|
void printTime(ostream& out);
|
|
|
|
// TEXT UTILITIES
|
|
string int2string(const int i);
|
|
string double2string(const double x, int const howManyDigitsAfterTheDot=5, bool round = false);
|
|
MDOUBLE string2double(const string& inString);
|
|
bool allowCharSet(const string& allowableChars, const string& string2check);
|
|
bool isCharInString(const string& stringToCheck, const char charToCheck);
|
|
void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile);
|
|
|
|
bool fromStringIterToInt(string::const_iterator & it,
|
|
const string::const_iterator endOfString,
|
|
int& res);
|
|
|
|
string takeCharOutOfString(const string& charsToTakeOut, const string& fromString);
|
|
void toLower(string& str);
|
|
void toUpper(string& str);
|
|
string toUpper2(const string& str);
|
|
//splits the string to substr according to the given delimiter (parallel to split in perl)
|
|
void splitString(const string& str,vector<string>& subStrs,const string& delimiter);
|
|
//input: a list of INTs seperated by commas ("1,3,5") returns the int in the vector
|
|
Vint getVintFromStr(const string& str);
|
|
//return a list of INTs seperated by commas ("1,3,5")
|
|
string getStrFromVint(const Vint& inVec);
|
|
|
|
// FILE UTILITIES
|
|
bool checkThatFileExist(const string& fileName);
|
|
string* searchStringInFile(const string& string2find,
|
|
const int index,
|
|
const string& inFileName);
|
|
string* searchStringInFile(const string& string2find,
|
|
const string& inFileName);
|
|
bool doesWordExistInFile(const string& string2find,const string& inFileName);
|
|
void createDir(const string& curDir,const string& dirName);
|
|
|
|
|
|
//BIT UTILITIES
|
|
//void nextBit(bitset<64> &cur);
|
|
|
|
//ARITHMETIC UTILITIES
|
|
//DEQUAL: == UP TO EPSILON
|
|
//DBIG_EQUAL: >= UP TO EPSILON
|
|
//DSMALL_EQUAL: <= UP TO EPSILON
|
|
bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // epsilon taken from WINDOW'S FILE FLOAT.H
|
|
bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F);
|
|
bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // {return ((x1 < x2) || DEQUAL(x1, x2));}
|
|
|
|
//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
|
|
//used in functoin mnbrack below.
|
|
void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d);
|
|
|
|
|
|
// print vector and VVdoulbe util
|
|
ostream &operator<<(ostream &out, const Vdouble &v);
|
|
ostream &operator<<(ostream &out, const VVdouble &m);
|
|
void mult(Vdouble& vec, const MDOUBLE factor);
|
|
void mult(VVdouble& vec, const MDOUBLE factor);
|
|
//scale vecToScale so that its new average is AvgIn. return the scaling factor.
|
|
MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn);
|
|
//determine the relative order of vecIn. The order vector is returned
|
|
//ex: vecIn = [0.1 0.4 0.01 0.9 1.8] orderVecOut = [1 2 0 3 4]
|
|
MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut);
|
|
|
|
void orderRankNoTies(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut);
|
|
|
|
//in this version orderVecOut does not preserv the same order as vecIn.
|
|
//orderVecOut[0] cotains the lowest score and it is stored in orderVecOut[0].getValue()
|
|
//The place in the original vector is stored in orderVecOut[0].getPlace()
|
|
void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut);
|
|
|
|
//calculates the spearman rank correlation value
|
|
MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
|
|
//calculates the spearman rank correlation value, Ofir implementation
|
|
MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
|
|
|
|
MDOUBLE calcCoVariance(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
|
|
MDOUBLE calcPearsonCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec, const int numberOfSignificantDigits=5);
|
|
|
|
MDOUBLE computeFDRthreshold(Vdouble& sortedPVals, MDOUBLE levelOfFDRcontroled, bool isPValsSorted=false);
|
|
|
|
|
|
MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
|
|
MDOUBLE calcMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues);
|
|
//MAD = mean absolute deviations distance
|
|
MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
|
|
MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
|
|
MDOUBLE sumVdouble(const Vdouble & vec);
|
|
|
|
/* Will split a string into 2 by the given seperator
|
|
Example for usage:
|
|
string a, b, c;
|
|
a.assign("Hello world!");
|
|
splitString2(a, " ", b, c);
|
|
cout << "b = " << b << endl << "c = " << c << endl;
|
|
//b == Hello
|
|
//c == world!
|
|
*/
|
|
void splitString2(string str, string seperater, string &first, string &second);
|
|
|
|
// used for gainLoss project
|
|
int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories);
|
|
int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories);
|
|
|
|
int sign(MDOUBLE r);
|
|
MDOUBLE factorial(int x);
|
|
MDOUBLE BinomialCoeff(int a, int b);
|
|
int round2int(MDOUBLE num);
|
|
|
|
//This function does: ln(e**(valuesVec[0])+e**(valuesVec[1])+..e**(valuesVec[n]))
|
|
//Which is: ln(e**(valuesVec[x]))*(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x])))
|
|
//Which is: valuesVec[x]+ln(1+sum_over_i_leave_x(e**(valuesVec[i]-valuesVec[x])))
|
|
//Where: x is the index of the largest element in valuesVec and every valuesVec[i] which is really small should be neglected in order to avoid underflow
|
|
MDOUBLE exponentResolver(Vdouble& valuesVec);
|
|
|
|
|
|
#endif
|
|
|