Headers in 4_InFrameStopCodonEstimator.py

2025-12-27 12:30:25 +08:00 · 2024-01-20 13:57:29 -05:00 · 2024-01-20 13:57:29 -05:00 · 4dd54cf87a
commit 4dd54cf87a
parent 9b02c0074c
1 changed files with 9 additions and 28 deletions
--- a/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py
+++ b/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py
@ -1,32 +1,13 @@
-#!/usr/bin/env python
+# Last updated Sept 2023
-
+# Authors: Xyrus Maurer-Alcala and Auden Cote-L'Heureux
 ##__Updated__: 18_08_2017
 ##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
 ##__Usage__: python 4_InFrameStopFreq.py --help
 ##########################################################################################
 ## This script is intended to aid in identifying the genetic code of the data given		##
 ##																						##
 ## Prior to running this script, ensure the following:									##
 ##																						##
 ## 1. You have assembled your transcriptome and COPIED the 'assembly' file 				##
 ##    (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder					##
 ## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py	##
 ## 3. Removed SSU/LSU sequences from your Fasta File									##
 ## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined		##
 ## 5. Classified the Non-Strongly Prokaryotic sequences into OGs 						##
 ##																						##
 ## 								COMMAND Example Below									##
 ##							Extra Notes at Bottom of Script								##
 ##																						##
 ## 			E-mail Xyrus (author) for help if needed: maurerax@gmail.com				##
 ##																						##
 ##								Next Script(s) to Run: 									##
 ##							 	 5_GCodeTranslate.py									##
 ##																						##
 ##########################################################################################
 # This script is intended to aid in identifying the genetic code of assembled
 # transcripts by similarity searching against a reference database of representative
 # sequences (Databases/RepEukProts) and calculating and reporting in-frame stop coding 
 # frequencies in all reading frames; it then reports these frequencies in a spreadsheet
 # (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use,
 # if unsure. This step can be skipped if genetic codes were input from the beginning. This
 # script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py.
 import argparse, os, sys
 from argparse import RawTextHelpFormatter,SUPPRESS