/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** trimAl v1.4: a tool for automated alignment trimming in large-scale phylogenetics analyses. readAl v1.4: a tool for automated alignment conversion among different formats. 2009-2015 Capella-Gutierrez S. and Gabaldon, T. [scapella, tgabaldon]@crg.es This file is part of trimAl/readAl. trimAl/readAl are free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, the last available version. trimAl/readAl are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with trimAl/readAl. If not, see . ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ #ifndef ALIGNMENT_H #define ALIGNMENT_H #include #include #include #include #include #include #include "statisticsGaps.h" #include "sequencesMatrix.h" #include "statisticsConservation.h" #include "similarityMatrix.h" #include "defines.h" #include "utils.h" using namespace std; struct newValues { int residues; int sequences; string *matrix; string *seqsName; }; /** \brief Class containing an alignment * * This class stores the alignment. It provides methods * to \b clean the alignment and generate the clean alignment. * It also provides methods for \b statistics \b calculation and * \b statistics \b printing. */ class alignment { int sequenNumber; int residNumber; bool isAligned; bool reverse; bool terminalGapOnly; int left_boundary; int right_boundary; int iformat; int oformat; bool shortNames; bool forceCaps; bool upperCase; bool lowerCase; bool keepSequences; bool keepHeader; string gapSymbol; int dataType; int ghWindow; int shWindow; int blockSize; string *sequences; string *seqsName; string *seqsInfo; string filename; string aligInfo; /* Sequences */ sequencesMatrix *seqMatrix; /* Statistics */ statisticsGaps *sgaps; statisticsConservation *scons; /* Sequences Identities */ float **identities; /* Sequences Overlaps */ float **overlaps; /* New Info */ bool oldAlignment; int *residuesNumber; int *saveResidues; int *saveSequences; private: /* ***** Fill the matrices from the input alignment ***** */ bool fillMatrices(bool aligned); /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ /* Alignment cleaning */ alignment *cleanByCutValue(double, float, const int *, bool); alignment *cleanByCutValue(float, float, const float *, bool); alignment *cleanByCutValue(double, const int *, float, float, const float *, bool); alignment *cleanStrict(int, const int *, float, const float *, bool, bool); alignment *cleanOverlapSeq(float, float *, bool); /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ public: /* Constructors */ alignment(void); alignment(string, string, string *, string *, string *, int, int, int, int, bool, int, int, bool, bool, int, int, bool, bool, int, int, int *, int *, int *, int, int, int, float **, float **); /* Overlap the operator = to use it as a constructor */ alignment &operator=(const alignment &); /* Destructor */ ~alignment(void); /* Basic operations */ /** \brief Alignment load method. * \param alignmentFile Alignment file name. * \return \e true if the load is ok, \e false if the load was wrong (i.e. the file doesn't exists). * * Method that loads an alignment from a file. */ bool loadAlignment(char *alignmentFile); /** \brief Alignment storing method. * \param destFile Destination file name of the alignment. * \return \e true if all is ok, \e false if there were errors (i.e. the file doesn't exists). * * Method that saves an alignment to a file. */ bool saveAlignment(char *destFile); /** \brief Alignment printing method. * \return \e true if all is ok, \e false if there were errors. * * Method that prints an alignment to the standard output. */ bool printAlignment(void); /* Alignment trimming. */ /** \brief Alignment trimming using gap method. * \param baseLine base line, security, percentage of colums in the alignment. * \param threshold maximum percentage of gaps permitted per column. * \return the clean alignment if all is ok, \e NULL if there were errors * * Method that cleans the alignment using gap method, and generates a new, * clean alignment. */ /* ****************************************************************************************************************** */ /* ********************************************************************************************************************/ alignment *cleanGaps(float, float, bool); /* ****************************************************************************************************************** */ /* ****************************************************************************************************************** */ /** \brief Alignment trimming using conservation method. * \param baseLine base line, security, percentage of colums in the alignment. * \return the clean alignment if all is ok, \e NULL if there were errors * * Method that cleans the alignment using conservation method, and generates a new, * clean alignment. */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *cleanConservation(float, float, bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *cleanCombMethods(bool, bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *cleanNoAllGaps(bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /** \brief Alignment trimming using gap method and conservation method. * \param baseLine base line, security, percentage of colums in the alignment. * \param threshold maximum percentage of gaps permitted per column. * \param conservationPct minimum conservatoin percentage permitted in the clean alignment. * \return the clean alignment if all is ok, \e NULL if there were errors. * * Method that cleans the alignment using both gaps method and conservation method, and * generates a new, clean alignment. */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *clean(float, float, float, bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /** \brief Alignment trimming using a vector of comparison values. * \param threshold minimal comparison value permitted per column. * \param vectValue comparison values vector from the alignment to clean. * \return the clean alignment if all is ok, \e NULL if there were errors * * Method that cleans the alignment using vector of comparison values, and generates * a new clean alignment. */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *cleanCompareFile(float, float, float *, bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ /** \brief Alignment automatic trimming relaxed method. * \return the clean alignment if all is ok, \e NULL if there were errors. * * Method that cleans, automaticly, the alignment using a second slope method between alignment's lenght * and gaps percentage ratio and, genenerates a new clean alignment. */ /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *clean2ndSlope(bool); /* *************************************************************************************************************** */ /* *************************************************************************************************************** */ alignment *cleanSpuriousSeq(float, float, bool); alignment *removeColumns(int *, int, int, bool); alignment *removeSequences(int *, int, int, bool); alignment *getClustering(float); float getCutPointClusters(int); /* Statistics calculation */ /** \brief Basic conservation statistics calculation. * \return \e true if all is ok, \e false if there were errors (i.e. there is no similarity matrix defined * in conservation statistics). * * This method calculates conservation statistics with the previously defined similarity matrix. */ bool calculateConservationStats(void); /** \brief Conservation statistics calculation. * \param sm similarity matrix used for the statistics calculation. * \return \e true if all is ok, \e false if there were errors. * * This method calculates conservation statistics using the \b sm similarity matrix. */ bool setSimilarityMatrix(similarityMatrix *sm); /** \brief Gap statistics calculation. * \return \e true if all is ok, \e false if there were errors. * * This method calculates gap statistics without window calculation (half window value = 0). */ bool calculateGapStats(void); /* Output Statistics */ /** \brief Printing normal gap statistics method. * * This method prints gap statistics for each column. */ void printStatisticsGapsColumns(void); /** \brief Printing accumulated gap statistics method. * * This method prints accumulated gap statistics. */ void printStatisticsGapsTotal(void); /** \brief Printing conservation values method. * * This method prints conservation value for each column of the alignment. */ void printStatisticsConservationColumns(void); /** \brief Printing accumulated conservation statistics method. * This method prints the accumulated number of columns for each conservation * value from the the alignment. */ void printStatisticsConservationTotal(void); void printCorrespondence(void); /* Alignment's Info */ /** \brief Gets alignment's sequenNumber number. * \return the alignment's sequenNumber number. * * This method returns the alignment's sequenNumber number. */ int getNumSpecies(void); /** \brief Gets alignment's sequenNumber names. * \param characters' matrix used to storage sequenNumber names. * * This method returns the alignment's sequenNumber names. */ void getSequences(string *); void getSequences(string *, int *); void getSequences(string *, string *, int *); bool getSeqNameOrder(string *, int *); /** \brief Gets alignment's amino acids number. * \return the alignment's amino acids number. * * This method returns the alignment's amino acids number. */ int getNumAminos(void); /* Alignments' Compare */ /** \brief Building alignment's sequence matrix method. * * This method builds an alignment's sequence matrix. */ void sequenMatrix(void); void destroySequenMatrix(void); /** \brief Printing alignment's sequence matrix method. * * This method prints an alignment's sequence matrix. */ void printSequenMatrix(void); /** \brief Returns a column from alignment's sequence matrix. * \param colum, sequence matrix index * \param columnSeqMatrix, vector used to storage a column from alignment sequence matrix. * * This method returns a column from alignment sequence matrix. */ void getColumnSeqMatrix(int, int *); /** \brief Returns a column from alignment's sequence matrix. * \param value to look in a sequence matrix row. * \param sequence matrix row where look for a value. * \param columnSeqMatrix, vector used to storage a column from alignment sequence matrix. * * Method that returns a column from the aligment's sequence matrix with the same value that * "value" at matrix's position (row, i) */ void getColumnSeqMatrix(int, int, int *); void setSeqMatrixOrder(int *); sequencesMatrix *getSeqMatrix(void); /* ********** NEW CODE ********** */ /* ********** ******** ********** */ int formatInputAlignment(char *); int typeInputFile(void); bool loadPhylipAlignment(char *); bool loadFastaAlignment(char *); bool loadClustalAlignment(char *); bool loadNexusAlignment(char *); bool loadMegaInterleavedAlignment(char *); bool loadMegaNonInterleavedAlignment(char *); bool loadNBRF_PirAlignment(char *); bool loadPhylip3_2Alignment(char *); /* ********** ******** ********** */ /* ********** ******** ********** */ /* Alignment to a stream */ void alignmentClustalToFile(ostream &); void alignmentNBRF_PirToFile(ostream &); void alignmentFastaToFile(ostream &); void alignmentPhylip3_2ToFile(ostream &); void alignmentPhylipToFile(ostream &); void alignmentPhylip_PamlToFile(ostream &); void alignmentNexusToFile(ostream &); void alignmentMegaToFile(ostream &); bool alignmentSummaryHTML(char *, int, int, int *, int *, float *); bool alignmentColourHTML(ostream &); void getSequences(ostream &); /* ********** ******** ********** */ void saveStatistics(similarityMatrix *sm); void saveStatistics(similarityMatrix *, int, int); void trimTerminalGaps(bool, int *); void setWindowsSize(int, int); void setBlockSize(int); void setOutputFormat(int, bool); void setReverse(void); int getInputFormat(void); int getOutputFormat(void); int getShortNames(void); int getReverse(void); int getBlockSize(void); void calculateSeqIdentity(void); // New void calculateRelaxedSeqIdentity(void); // New void calculateSeqOverlap(void); void printSeqOverlap(void); int selectMethod(void); void printSeqIdentity(void); bool calculateSpuriousVector(float, float *); void checkTypeAlignment(void); int getTypeAlignment(void); int *getCorrespResidues(void); int *getCorrespSequences(void); bool isFileAligned(void); alignment * getTranslationCDS(int, int, int *, string *, sequencesMatrix *, alignment *); bool checkCorrespondence(string *, int *, int, int); int *calculateRepresentativeSeq(float maximumIdent); /* New code: version 1.4 */ void computeComplementaryAlig(bool, bool); void removeSmallerBlocks(int); bool removeOnlyTerminal(void); bool removeOnlyTerminal(int, int); newValues removeCols_SeqsAllGaps(void); void removeCols_SeqsAllGaps(newValues *); void fillNewDataStructure(string *, string *); void fillNewDataStructure(newValues *); // New Code: February/2012 void calculateColIdentity(float *); void printColumnsIdentity_DescriptiveStats(void); // New Code: May/2012 void setKeepSequencesFlag(bool); // New Code: Mar/2013 void setKeepSeqsHeaderFlag(bool); void printAlignmentInfo(ostream &); // Updated: June/2013 bool prepareCodingSequence(bool, bool, alignment *); }; #endif