From f3116ff874faf007b0060a37aead45bcfb226fa1 Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:40:30 -0500 Subject: [PATCH] Updating header in 5b_SummaryStats.py --- PTL1/Genomes/Scripts/5b_SummaryStats.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/PTL1/Genomes/Scripts/5b_SummaryStats.py b/PTL1/Genomes/Scripts/5b_SummaryStats.py index efc0856..f2a9ec1 100644 --- a/PTL1/Genomes/Scripts/5b_SummaryStats.py +++ b/PTL1/Genomes/Scripts/5b_SummaryStats.py @@ -1,3 +1,14 @@ +# Last updated Sept 2023 +# Author: Auden Cote-L'Heureux + +# This script produces both taxon- and sequence-level statistics to describe the ReadyToGo files +# output by PhyloToL Part 1, as well as some OG-level information from the Hook (OG reference) +# database. It relies on the utility script CUB.py to calculate composition statistics (GC content, +# Effective Number of Codons, etc.). Both sequence level and taxon-level stats are summarized in tab-separated +# outputs written to the Output folder. This script requires that the OG reference database is available as an +# amino acid fasta file in the Databases/db_OG folder with the same file name as the .dmnd file used in script 4. +# This script is intended to be run as part of the PhyloToL 6 Part 1 pipeline using the script wrapper.py. + import os, sys import argparse from Bio import SeqIO