From 8487f1d836a02880499ab8d362261dece4e05dfb Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Wed, 19 Mar 2025 09:20:37 -0400 Subject: [PATCH] Updating headers in 6_FilterPartials.py --- PTL1/Transcriptomes/Scripts/6_FilterPartials.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PTL1/Transcriptomes/Scripts/6_FilterPartials.py b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py index 74fe65c..df3942c 100644 --- a/PTL1/Transcriptomes/Scripts/6_FilterPartials.py +++ b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py @@ -5,7 +5,8 @@ # First, all sequences shorter than 33% or longer than 150% the average length of sequences # from the same OG in the Hook database are removed. Then, for each transcriptomic sample, # all sequences within an OG are compared at the nucleotide level to the sequence with the -# highest “score” (defined as k-mer coverage multiplied by length). The script should be run +# highest “score” (defined as k-mer coverage multiplied by length) using BLAST, and sequences that +# are 98% identical to the master sequence are removed. The script should be run # as part of the EukPhylo Part 1 pipeline using the script wrapper.py. It requires that the # structure of the 'Output' folder be as output by script 5, and that the Databases/db_OG folder # contains a .fasta file containing all amino acid sequences in the OG reference database (Hook)