diff --git a/PTL2/Scripts/concatenate.py b/PTL2/Scripts/concatenate.py index befc325..8dd161d 100644 --- a/PTL2/Scripts/concatenate.py +++ b/PTL2/Scripts/concatenate.py @@ -1,12 +1,12 @@ # Last updated Jan 2024 # Authors: Auden Cote-L'Heureux and Mario Ceron-Romero -# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end PhyloToL run, +# This script chooses orthologs to concatenate OGs. This can be done as part of an end-to-end EukPhylo run, # or by inputting already complete alignments and gene trees and running only the concatenation step. # Use the --concatenate flag to run this step, and optionally use the argument --concat_target_taxa to input # a file containing a list of taxon codes to be included in the concatenated alignment. If a GF has more # than one sequence from a taxon, a representative ortholog must be chosen to include in the concatenated alignment. -# To do this, for each taxon PhyloToL keeps only the sequences falling in the monophyletic clade in the tree +# To do this, for each taxon EukPhylo keeps only the sequences falling in the monophyletic clade in the tree # that contains the greatest number of species of the taxon’s minor clade (or major clade, if the ‘target taxon list’ # uses major-clade codes). If multiple sequences from the taxon fall into this largest clade, then the sequence # with the highest ‘score’ (defined as length times k-mer coverage for transcriptomic data with k-mer coverage