From 8487f1d836a02880499ab8d362261dece4e05dfb Mon Sep 17 00:00:00 2001
From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com>
Date: Wed, 19 Mar 2025 09:20:37 -0400
Subject: [PATCH] Updating headers in 6_FilterPartials.py

---
 PTL1/Transcriptomes/Scripts/6_FilterPartials.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PTL1/Transcriptomes/Scripts/6_FilterPartials.py b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py
index 74fe65c..df3942c 100644
--- a/PTL1/Transcriptomes/Scripts/6_FilterPartials.py
+++ b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py
@@ -5,7 +5,8 @@
 # First, all sequences shorter than 33% or longer than 150% the average length of sequences 
 # from the same OG in the Hook database are removed. Then, for each transcriptomic sample, 
 # all sequences within an OG are compared at the nucleotide level to the sequence with the 
-# highest “score” (defined as k-mer coverage multiplied by length). The script should be run
+# highest “score” (defined as k-mer coverage multiplied by length) using BLAST, and sequences that
+# are 98% identical to the master sequence are removed. The script should be run
 # as part of the EukPhylo Part 1 pipeline using the script wrapper.py. It requires that the
 # structure of the 'Output' folder be as output by script 5, and that the Databases/db_OG folder
 # contains a .fasta file containing all amino acid sequences in the OG reference database (Hook)