From 4dd54cf87a4df9af13983c430187442127b60e2a Mon Sep 17 00:00:00 2001
From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com>
Date: Sat, 20 Jan 2024 13:57:29 -0500
Subject: [PATCH] Headers in 4_InFrameStopCodonEstimator.py

---
 .../Scripts/4_InFrameStopCodonEstimator.py    | 37 +++++--------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py b/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py
index e5def97..d707dd0 100644
--- a/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py
+++ b/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py
@@ -1,32 +1,13 @@
-#!/usr/bin/env python
-
-##__Updated__: 18_08_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 4_InFrameStopFreq.py --help
-
-
-##########################################################################################
-## This script is intended to aid in identifying the genetic code of the data given		##
-##																						##
-## Prior to running this script, ensure the following:									##
-##																						##
-## 1. You have assembled your transcriptome and COPIED the 'assembly' file 				##
-##    (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder					##
-## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py	##
-## 3. Removed SSU/LSU sequences from your Fasta File									##
-## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined		##
-## 5. Classified the Non-Strongly Prokaryotic sequences into OGs 						##
-##																						##
-## 								COMMAND Example Below									##
-##							Extra Notes at Bottom of Script								##
-##																						##
-## 			E-mail Xyrus (author) for help if needed: maurerax@gmail.com				##
-##																						##
-##								Next Script(s) to Run: 									##
-##							 	 5_GCodeTranslate.py									##
-##																						##
-##########################################################################################
+# Last updated Sept 2023
+# Authors: Xyrus Maurer-Alcala and Auden Cote-L'Heureux
 
+# This script is intended to aid in identifying the genetic code of assembled
+# transcripts by similarity searching against a reference database of representative
+# sequences (Databases/RepEukProts) and calculating and reporting in-frame stop coding 
+# frequencies in all reading frames; it then reports these frequencies in a spreadsheet
+# (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use,
+# if unsure. This step can be skipped if genetic codes were input from the beginning. This
+# script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
 
 import argparse, os, sys
 from argparse import RawTextHelpFormatter,SUPPRESS