From 4dd54cf87a4df9af13983c430187442127b60e2a Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Sat, 20 Jan 2024 13:57:29 -0500 Subject: [PATCH] Headers in 4_InFrameStopCodonEstimator.py --- .../Scripts/4_InFrameStopCodonEstimator.py | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py b/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py index e5def97..d707dd0 100644 --- a/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py +++ b/PTL1/Transcriptomes/Scripts/4_InFrameStopCodonEstimator.py @@ -1,32 +1,13 @@ -#!/usr/bin/env python - -##__Updated__: 18_08_2017 -##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com -##__Usage__: python 4_InFrameStopFreq.py --help - - -########################################################################################## -## This script is intended to aid in identifying the genetic code of the data given ## -## ## -## Prior to running this script, ensure the following: ## -## ## -## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## -## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## -## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## -## 3. Removed SSU/LSU sequences from your Fasta File ## -## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ## -## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ## -## ## -## COMMAND Example Below ## -## Extra Notes at Bottom of Script ## -## ## -## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## -## ## -## Next Script(s) to Run: ## -## 5_GCodeTranslate.py ## -## ## -########################################################################################## +# Last updated Sept 2023 +# Authors: Xyrus Maurer-Alcala and Auden Cote-L'Heureux +# This script is intended to aid in identifying the genetic code of assembled +# transcripts by similarity searching against a reference database of representative +# sequences (Databases/RepEukProts) and calculating and reporting in-frame stop coding +# frequencies in all reading frames; it then reports these frequencies in a spreadsheet +# (gcodes_output.tsv) for the user to inspect in deciding which genetic codes to use, +# if unsure. This step can be skipped if genetic codes were input from the beginning. This +# script should be run through the PhyloToL 6 Part 1 pipeline using the script wrapper.py. import argparse, os, sys from argparse import RawTextHelpFormatter,SUPPRESS