From 6a2e8153786f76e24724b6f125a5a0290db1bbde Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:05:28 -0500 Subject: [PATCH] Updating header in 7b_SummaryStats.py --- PTL1/Transcriptomes/Scripts/7b_SummaryStats.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/PTL1/Transcriptomes/Scripts/7b_SummaryStats.py b/PTL1/Transcriptomes/Scripts/7b_SummaryStats.py index 673a300..0d6328b 100644 --- a/PTL1/Transcriptomes/Scripts/7b_SummaryStats.py +++ b/PTL1/Transcriptomes/Scripts/7b_SummaryStats.py @@ -1,3 +1,15 @@ +# Last updated Sept 2023 +# Author: Auden Cote-L'Heureux + +# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files +# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference) +# database and the original input assembled transcripts. It relies on the utility script CUB.py +# to calculate composition statistics (GC content, Effective Number of Codons, etc.). Both sequence +# level and taxon-level stats are summarized in tab-separated outputs written to the Output folder. +# This script requires that the OG reference database is available as an amino acid fasta file +# in the Databases/db_OG folder with the same file name as the .dmnd file used in script 3. This script +# is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py. + import os, sys import argparse from Bio import SeqIO