Delete PTL1/Genomes/Scripts directory

2025-12-27 05:20:24 +08:00 · 2023-06-12 13:30:05 -04:00 · 2023-06-12 13:30:05 -04:00 · 3347fc979a
commit 3347fc979a
parent f77ece5a2a
7 changed files with 0 additions and 1734 deletions
--- a/PTL1/Genomes/Scripts/1_RenameCDS.py
+++ b/PTL1/Genomes/Scripts/1_RenameCDS.py
@ -1,217 +0,0 @@
-#!/usr/bin/env python3.5
-
-##__Updated__: 19_09_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 1g_RenameCDS.py --help
-
-
-from Bio import SeqIO
-from Bio.SeqUtils import GC
-import argparse, os, sys, time
-from argparse import RawTextHelpFormatter,SUPPRESS
-
-
-#----------------------------- Colors For Print Statements ------------------------------#
-class color:
-   PURPLE = '\033[95m'
-   CYAN = '\033[96m'
-   DARKCYAN = '\033[36m'
-   ORANGE = '\033[38;5;214m'
-   BLUE = '\033[94m'
-   GREEN = '\033[92m'
-   YELLOW = '\033[93m'
-   RED = '\033[91m'
-   BOLD = '\033[1m'
-   UNDERLINE = '\033[4m'
-   END = '\033[0m'
-   
-#------------------------------- Main Functions of Script --------------------------------#
-###########################################################################################
-###--------------------- Parses and Checks Command-Line Arguments ----------------------###
-###########################################################################################
-
-def check_args():
-
-	parser = argparse.ArgumentParser(description=
-	color.BOLD + '\n\nThis script is intended to extract '+color.RED+'Annotated '+\
-	color.PURPLE+'ORFS\n'+color.END+color.BOLD+'from a provided Genbank formatted file.'\
-	+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter)
-	
-	required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
- 
-	required_arg_group.add_argument('--input_file','-in', action='store',
-	help=color.BOLD+color.GREEN+' Fasta file with CDSs\n'+color.END)
-
-	required_arg_group.add_argument('--output_dir','-o', action='store',
-	help=color.BOLD+color.GREEN+' Output directory\n'+color.END)
-
-	optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
-
-	optional_arg_group.add_argument('--source','-s', action='store', default='GenBank',
-	help=color.BOLD+color.GREEN+' Data Source of CDSs (default = "GenBank")\n'+color.END)
-
-	optional_arg_group.add_argument('--list_source','-lsrc', action='store_true',
-	help=color.BOLD+color.GREEN+' Lists supported data sources\n'+color.END)
-
-	optional_arg_group.add_argument('-author', action='store_true',
-	help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
-
-
-	if len(sys.argv[1:]) == 0:
-		print (parser.description)
-		print ('\n')
-		sys.exit()
-
-	args = parser.parse_args()
-	
-	more_info = return_more_info(args)
-	if more_info != None:
-		print (parser.description)
-		print (more_info)
-		sys.exit()
-	
-	args.folder = args.output_dir + '/' + args.input_file.split('/')[-1][:10]
-		
-	return args
-	
-	
-###########################################################################################
-###------------------------------- Script Usage Message --------------------------------###
-###########################################################################################
-
-def usage_msg():
-	return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 1g_RenameCDS.py'\
-	' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.fasta --source'\
-	' GenBank'+color.END)
-
-
-##########################################################################################
-###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
-##########################################################################################
-
-def return_more_info(args):
-
-	acceptable_sources = ['in-house', 'in-lab', 'GenBank', 'gb', 'NCBI']
-
-	author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
-	' maurerax@gmail.com\n\n'+color.END)
-
-	if args.author == True:
-		return author
-
-	if args.list_source == True:
-		print (color.BOLD+color.RED+'\nThese are the currently supported data sources.\n'+color.END)
-		print (color.BOLD+color.ORANGE+'\n'.join(acceptable_sources)+'\n\n'+color.END)
-		sys.exit()
-
-	if args.source.lower() not in [i.lower() for i in acceptable_sources]:
-		print (color.BOLD+color.RED+'\nUnsupported source was provided.\n\nEnsure that '\
-		'you are providing a valid data source (see below).\n'+color.END)
-		print (color.BOLD+color.ORANGE+'\n'.join(acceptable_sources)+'\n'+color.END)
-		sys.exit()
-	
-	if args.input_file != None:
-		if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])):
-			print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-			'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-			' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-			sys.exit()
-	
-
-###########################################################################################
-###--------------------------- Does the Inital Folder Prep -----------------------------###
-###########################################################################################
-
-def prep_folders(args):
-
-	if os.path.isdir(args.folder) != True:
-		os.system('mkdir '+args.folder)
-		os.system('cp '+args.input_file+' '+args.folder)
-		args.input_file = args.folder+'/'+args.input_file.split('/')[-1]
-		
-	if os.path.isdir(args.folder+'/Original') != True:
-		os.system('mkdir '+args.folder+'/Original')
-
-	os.system('cp '+args.input_file+' '+args.folder+'/Original/')
-
-###########################################################################################
-###------------- Renames Protein-Coding CDS Sequences to Standard Format ---------------###
-###########################################################################################
-
-def renamed_GenomeCDS(args):
-		
-	print (color.BOLD+'\n\nPrepping to rename '+color.GREEN+args.input_file.split('/')[-1]+\
-	color.END+color.BOLD+"'s CDS sequences"+color.END)
-	inFasta = sorted((i for i in SeqIO.parse(args.input_file,'fasta')),key=lambda seq_rec: -len(seq_rec.seq))
-
-	renamed_seqs = []
-	seq_code_dict = {}
-
-	count = 1
-	for seq_rec in inFasta:
-		seq_code_dict.setdefault(seq_rec.description,[]).append('Contig_'+str(count)+'_Len'+str(len(seq_rec.seq)))
-		seq_code_dict[seq_rec.description].append(str(seq_rec.seq).upper())
-		renamed_seqs.append('>Contig_' + str(count) + '_Len' + str(len(seq_rec.seq)) + '\n' + str(seq_rec.seq).upper())
-		count += 1
-
-	## keeps only CDSs that are greater than 30 bp (10 AA --> This is a cut-off in the 
-	## phylogenomic pipeline too!)
-	renamed_seqs = [i for i in renamed_seqs if len(i.split('\n')[-1]) > 30]
-	
-	print (color.BOLD+'\n\nFor '+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+\
-	color.BOLD+', '+color.RED+str(len(renamed_seqs))+' CDS sequences\n'+color.END+color.BOLD+
-	'were renamed while preserving the '+color.ORANGE+args.source+color.END+color.BOLD+' formatting'\
-	+color.END+'\n')
-	
-	with open(args.input_file.replace('.fasta','.Prepped.fasta'),'w+') as w:
-		w.write('\n'.join(renamed_seqs))
-
-	with open(args.input_file.split('/')[-1].replace('.fasta','.SeqCodes.tsv'),'w+') as w:
-		w.write('Original Name\tNew Name\tSeq Length\t Seq GC\n')
-		for k, v in seq_code_dict.items():
-			w.write(k+'\t'+v[0]+'\t'+str(len(v[1]))+'\t'+str(GC(v[1]))+'\n')
-
-
-###########################################################################################
-###--------------------- Cleans up the Folder and Moves Final Files --------------------###
-###########################################################################################
-
-def clean_up(args):
-
-#	os.system('rm '+args.input_file)
-	os.system('mv ' + args.input_file.split('/')[-1].replace('.fasta','.SeqCodes.tsv') + ' ' + args.folder + '/Original/')
-	os.system('mv ' + args.input_file + ' ' + args.folder + '/Original/')
-
-
-###########################################################################################
-###-------------------------------- Next Script Message --------------------------------###
-###########################################################################################
-
-def next_script(args):
-
-	print (color.BOLD+'\nLook for '+color.DARKCYAN+args.input_file.split('/')[-1].replace('.fasta','.Renamed.fasta')\
-	+'.fasta'+color.END+color.BOLD+'\nin the '+color.ORANGE+args.folder.split('/')[-1]+\
-	' Folder\n\n'+color.END+color.BOLD)
-
-	print ('Next Script(s) are:\n\n'+color.PURPLE+'2g_GCodeEval.py'+color.END+color.BOLD\
-	+' (if Genetic Code is '+color.RED+'Unknown'+color.END+color.BOLD+')\n\nOtherwise:\n\n'+\
-	color.PURPLE+'3g_GCodeTranslate.py\n\n'+color.END)
-	
-	
-##########################################################################################
-###----------------------------- Calls on Above Functions -----------------------------###
-##########################################################################################
-
-def main():
-
-	args = check_args()
-
-	prep_folders(args)
-		
-	renamed_GenomeCDS(args)
-	
-	clean_up(args)
-	
-	next_script(args)
-	
-main()
--- a/PTL1/Genomes/Scripts/2_GCodeEval.py
+++ b/PTL1/Genomes/Scripts/2_GCodeEval.py
@ -1,252 +0,0 @@
-#!/usr/bin/env python3.5
-
-##__Updated__: 19_09_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 2g_GCodeEval.py --help
-
-
-#############################################################################################
-#                                                                                           #
-# Suggests which Genetic Code to use based upon Presence/Absence of Specific Stop Codons    #
-# at the end of the CDS sequences. This is to provide a ROUGH gauge for the user.           #
-#                                                                                           #
-#############################################################################################
-
-
-import argparse, os, sys
-from argparse import RawTextHelpFormatter,SUPPRESS
-from Bio import SeqIO
-from Bio.Seq import Seq
-
-#----------------------------- Colors For Print Statements ------------------------------#
-class color:
-   PURPLE = '\033[95m'
-   CYAN = '\033[96m'
-   DARKCYAN = '\033[36m'
-   ORANGE = '\033[38;5;214m'
-   BLUE = '\033[94m'
-   GREEN = '\033[92m'
-   YELLOW = '\033[93m'
-   RED = '\033[91m'
-   BOLD = '\033[1m'
-   UNDERLINE = '\033[4m'
-   END = '\033[0m'
-
-
-#------------------------------- Main Functions of Script --------------------------------#
-
-###########################################################################################
-###------------------------- Checks the Command Line Arguments -------------------------###
-###########################################################################################
-
-def check_args():
-
-	parser = argparse.ArgumentParser(description=
-	color.BOLD + '\n\nThis script is intended to aid you with '+color.RED+'evaluating\n(or checking) '+\
-	color.END+color.BOLD+'the putative '+color.PURPLE+'Genetic Code'+color.END+color.BOLD+\
-	' for a given\nFasta file of annotated (and untranslated) CDSs.\n\nTo do so, this script'\
-	' checks for stop codon usages,\n'+color.RED+'suggesting '+color.END+color.BOLD+'the use of'\
-	+color.PURPLE+' published and well-known\nalternate genetic codes'+color.END+color.BOLD+\
-	' that are supported by the\nnext script: '+color.END+color.BOLD+color.PURPLE+'3g_GCodeTranslate.py'\
-	+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter)
-
-	
-	required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
- 
-	required_arg_group.add_argument('--input_file','-in', action='store',
-	help=color.BOLD+color.GREEN+' Fasta file with CDSs\n'+color.END)
-
-	optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
-
-	optional_arg_group.add_argument('--list_codes','-codes', action='store_true',
-	help=color.BOLD+color.GREEN+' Lists supported genetic codes\n'+color.END)
-
-	optional_arg_group.add_argument('-author', action='store_true',
-	help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
-
-
-	if len(sys.argv[1:]) == 0:
-		print (parser.description)
-		print ('\n')
-		sys.exit()
-
-	args = parser.parse_args()
-	
-	quit_eval = return_more_info(args)
-	if quit_eval > 0:
-		sys.exit()
-		
-	args.folder = '/'.join(args.input_file.split('/')[:-1])
-		
-	return args
-	
-	
-###########################################################################################
-###------------------------------- Script Usage Message --------------------------------###
-###########################################################################################
-
-def usage_msg():
-	return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 2g_GCodeEval.py'\
-	' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.Renamed.fasta'+color.END)
-
-
-##########################################################################################
-###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
-##########################################################################################
-
-def return_more_info(args):
-
-	valid_arg = 0
-
-	supported_gcodes = ['Blepharisma\t(TGA = W)','Chilodonella\t(TAG/TGA = Q)','Ciliate\t\t(TAR = Q)',\
-	'Conylostoma\t(TAR = Q, TGA = W)','Euplotes\t(TGA = C)','Peritrich\t(TAR = E)','None\t\t(TGA/TAG/TAA = X)',\
-	'Universal\t(TGA/TAG/TAA = STOP)','TAA\t\t(TAG/TGA = Q)', 'TAG\t\t(TRA = Q)', 'TGA\t\t(TAR = Q)']
-
-	author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
-	' maurerax@gmail.com\n\n'+color.END)
-
-	if args.list_codes == True:
-		print (color.BOLD+color.RED+'\nThese are the currently supported genetic codes.\n'+color.END)
-		print (color.BOLD+color.ORANGE+'\n'.join(supported_gcodes)+'\n\n'+color.END)
-		valid_arg += 1	
-
-	if args.author == True:
-		print (author)
-		valid_arg += 1
-
-	print(args.input_file.split('/')[-1], '/'.join(args.input_file.split('/')[:-1]))
-
-
-	if args.input_file != None:
-		if os.path.isfile(args.input_file) != False:
-
-			if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])):
-				print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-				'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-				' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-				valid_arg += 1
-		else:
-			print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-			'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-			' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-			valid_arg += 1
-			
-	return valid_arg
-	
-
-###########################################################################################
-###-------------------- Counts Several Metrics of Stop Codon Usage ---------------------###
-###########################################################################################
-
-def count_stops(args):
-
-	print (color.BOLD+'\n\nScanning CDSs for In-Frame Stop Codons and Tracking\nFINAL '\
-	'(Terminal) stop codon usage\n\n'+color.END)
-	
-	inFasta = [i for i in SeqIO.parse(args.input_file,'fasta')]
-	seq_ends = [str(i.seq)[-3:].lower() for i in inFasta]
-	inFrame_stops_raw = [str(i.seq[:-3].translate()).count('*') for i in inFasta]
-	inFrame_stops_summary = [i for i in inFrame_stops_raw if i != 0]
-				
-	tga_end = seq_ends.count('tga')
-	tag_end = seq_ends.count('tag')
-	taa_end = seq_ends.count('taa')
-	
-	end_stop_freq = [tga_end, tag_end, taa_end]
-	
-	if max(end_stop_freq) > 0.95*sum(end_stop_freq):
-		pos_to_keep = [i for i, j in enumerate(end_stop_freq) if j == max(end_stop_freq)][0]
-	try:
-		if pos_to_keep == 0:
-			end_stop_freq = [end_stop_freq[0],0,0]
-		elif pos_to_keep == 1:
-			end_stop_freq = [0,end_stop_freq[1],0]
-		elif pos_to_keep == 2:
-			end_stop_freq = [0,0,end_stop_freq[2]]
-	except:
-		pass
-		
-	inFrame_stop_info = [len(inFrame_stops_summary), int(round(len(inFrame_stops_raw)*0.05)), sum(inFrame_stops_summary)]
-	return end_stop_freq, inFrame_stop_info
-
-
-###########################################################################################
-###-------------------- Suggests Genetic Code Given Stop Codon Usage -------------------###
-###########################################################################################
-
-def suggest_code(args):
-
-	stop_freq, inFrames = count_stops(args)
-
-	genetic_code = ''
-
-	if stop_freq.count(0) == 3:
-		print (color.BOLD + color.RED + '\n\nNO Stop Codons Present in Data-set\n\n')
-		genetic_code = 'None (UNDETERMINED -- NO STOP CODONS)'
-	else:
-	## DUMB way of checking if there are a significant (> 5%) number of CDSs with IN-FRAME stop codons 
-		if inFrames[0] < inFrames[1]:
-			print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' Universal (table = 1)'+color.END)
-			genetic_code = 'Universal (table = 1)'	
-		else:
-		
-			if stop_freq[0] != 0 and stop_freq[1] != 0 and stop_freq[2] != 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' Condylostoma-Code'\
-				' (No Dedicated Stops) OR None (all stops = "X")'+color.END)
-				genetic_code = 'Condylostoma or None'
-			if stop_freq[0] == 0 and stop_freq[1] == 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' Chilodonella-Code'\
-				+' (Only Stop = TAA)'+color.END)
-				genetic_code = 'Chilodonella or TAA'
-			if stop_freq[0] == 0 and stop_freq[2] == 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' TAG-Code'\
-				+' (Only Stop = TAG)'+color.END)
-				genetic_code = 'TAG'
-			if stop_freq[1] == 0 and stop_freq[2] == 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' Ciliate-Code'\
-				+' (table = 6)'+color.END)
-				genetic_code = 'Ciliate (table = 6)'
-			if stop_freq[0] != 0 and stop_freq[1] != 0 and stop_freq[2] == 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' TGA/TAG are STOP'+color.END)
-				genetic_code = 'TGA/TAG'
-			if stop_freq[0] != 0 and stop_freq[1] == 0 and stop_freq[2] != 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' TGA/TAA are STOP'+color.END)
-				genetic_code = 'TGA/TAA'
-			if stop_freq[0] == 0 and stop_freq[1] != 0 and stop_freq[2] != 0:
-				print (color.BOLD + '\n\nSuggested Genetic Code is: '+color.CYAN+' Blepharisma/Euplotes-Codes'\
-				+color.END + color.BOLD+'\n--- NOTE: '+color.RED+' Stop-Codon Reassignments'\
-				+' differ! (TGA = W or TGA = C)' + color.END)
-				genetic_code = 'Blepharisma (TGA = W) or Euplotes (TGA = C)'
-
-	return genetic_code, stop_freq
-	
-
-###########################################################################################
-###---------------- Writes Out Currently Crummy Summary of Genetic Codes ---------------###
-###########################################################################################
-
-def summarize(args):
-
-	suggestion, stop_freq = suggest_code(args)
-
-	with open(args.input_file.split('.fa')[0]+'.GeneticCode.txt','w+') as w:
-		w.write('Stop Codon\tFrequency\n')
-		w.write('TGA\t'+str(stop_freq[0])+'\n')
-		w.write('TAG\t'+str(stop_freq[1])+'\n')
-		w.write('TAA\t'+str(stop_freq[2])+'\n\n')
-		w.write('Suggestion For Genetic Code:\t'+suggestion+'\n\n')
-
-
-##########################################################################################
-###--------------- Checks Command Line Arguments and Calls on Functions ---------------###
-##########################################################################################
-		
-def main():
-	
-	args = check_args()
-		
-	summarize(args)
-		
-	print (color.BOLD+'\nNext Script is: '+color.PURPLE+' 3g_GCodeTranslate.py\n\n'+color.END)
-
-main()
--- a/PTL1/Genomes/Scripts/3_GCodeTranslate.py
+++ b/PTL1/Genomes/Scripts/3_GCodeTranslate.py
@ -1,397 +0,0 @@
-#!/usr/bin/env python3.5
-
-##__Updated__: 19_09_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 3g_GCodeTranslate.py --help
-
-
-##############################################################################
-##																			##
-## Translates CDSs sequences using the Provided Genetic Code. 				##
-##																			##
-## NOTE: 																	##
-##		No provided input for genetic code results in Translation with the	##
-## 		UNIVERSAL genetic code (as default)									##
-##																			##
-##		E-mail Xyrus (author) for help if needed: maurerax@gmail.com		##
-##																			##
-##############################################################################
-
-
-import argparse, os, sys
-from argparse import RawTextHelpFormatter,SUPPRESS
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.Data.CodonTable import CodonTable
-
-
-
-#-------------------------- Set-up Codon Tables (Genetic Codes) --------------------------#
-
-blepharisma_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y',                       
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TAA','TAG'])
-
-condylostoma_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Q', 'TAG': 'Q',
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = [''])
-
-c_uncinata_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y',             'TAG': 'Q',
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TAA'])
-
-euplotes_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y',                       
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TAA','TAG'])
-
-myrionecta_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Y', 'TAG': 'Y',
-	'TGT': 'C', 'TGC': 'C',             'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TGA'])
-
-no_stop_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y', 'TAA': 'X', 'TAG': 'X',
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'X', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = [''])
-
-peritrich_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y', 'TAA': 'E', 'TAG': 'E',
-	'TGT': 'C', 'TGC': 'C',             'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TGA'])
-	
-tag_table = CodonTable(forward_table={
-	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-	'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-	'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Q',            
-	'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W',
-	'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-	'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-	'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-	'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-	'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-	'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-	'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-	'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-	'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-	'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-	'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-	'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'},
-	start_codons = [ 'ATG'],
-	stop_codons = ['TAG'])
-
-
-#----------------------------- Colors For Print Statements ------------------------------#
-class color:
-   PURPLE = '\033[95m'
-   CYAN = '\033[96m'
-   DARKCYAN = '\033[36m'
-   ORANGE = '\033[38;5;214m'
-   BLUE = '\033[94m'
-   GREEN = '\033[92m'
-   YELLOW = '\033[93m'
-   RED = '\033[91m'
-   BOLD = '\033[1m'
-   UNDERLINE = '\033[4m'
-   END = '\033[0m'
-
-
-#------------------------------- Main Functions of Script --------------------------------#
-
-###########################################################################################
-###------------------------- Checks the Command Line Arguments -------------------------###
-###########################################################################################
-
-def check_args():
-
-	parser = argparse.ArgumentParser(description=
-	color.BOLD + '\n\nThis script will '+color.RED+'Translate '+color.END+color.BOLD+'a '\
-	'given Fasta file of CDS\nsequences using a given'+color.PURPLE+' Genetic Code.'+color.END+\
-	color.BOLD+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter)
-
-	
-	required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
- 
-	required_arg_group.add_argument('--input_file','-in', action='store',
-	help=color.BOLD+color.GREEN+' Fasta file with CDSs\n'+color.END)
-
-	optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
-
-	optional_arg_group.add_argument('--genetic_code','-g', action='store', default='universal',
-	help=color.BOLD+color.GREEN+' Genetic code to use for translation\n (default = '\
-	'"universal")\n'+color.END)
-
-	optional_arg_group.add_argument('--list_codes','-codes', action='store_true',
-	help=color.BOLD+color.GREEN+' Lists supported genetic codes\n'+color.END)
-
-	optional_arg_group.add_argument('-author', action='store_true',
-	help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
-
-
-	if len(sys.argv[1:]) == 0:
-		print (parser.description)
-		print ('\n')
-		sys.exit()
-
-	args = parser.parse_args()
-	
-	quit_eval = return_more_info(args)
-	if quit_eval > 0:
-		sys.exit()
-		
-	args.folder = '../'+args.input_file.split('/')[1]
-	args.out_name = args.input_file.split('.Prepped')[0]+'.'+args.genetic_code.title()+'.AA.fasta'
-	args.new_ntd_name = args.input_file.split('.Prepped')[0]+'.'+args.genetic_code.title()+'.NTD.fasta'
-	
-	return args
-	
-	
-###########################################################################################
-###------------------------------- Script Usage Message --------------------------------###
-###########################################################################################
-
-def usage_msg():
-	return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 3g_GCodeTranslate.py'\
-	' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.Prepped.fasta'\
-	' --genetic_code Universal'+color.END)
-
-
-##########################################################################################
-###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
-##########################################################################################
-
-def return_more_info(args):
-
-	valid_arg = 0
-
-	supported_gcodes_names = ['bleph','blepharisma','chilo','chilodonella','condy',\
-	'condylostoma','none','eup','euplotes','peritrich','vorticella','ciliate','universal',\
-	'taa','tag','tga']
-
-	supported_gcodes_list = ['Blepharisma\t(TGA = W)','Chilodonella\t(TAG/TGA = Q)','Ciliate\t\t(TAR = Q)',\
-	'Conylostoma\t(TAR = Q, TGA = W)','Euplotes\t(TGA = C)','Peritrich\t(TAR = E)','None\t\t(TGA/TAG/TAA = X)',\
-	'Universal\t(TGA/TAG/TAA = STOP)','TAA\t\t(TAG/TGA = Q)', 'TAG\t\t(TRA = Q)', 'TGA\t\t(TAR = Q)']
-
-	author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
-	' maurerax@gmail.com\n\n'+color.END)
-
-
-	if args.genetic_code != None and args.genetic_code.lower() not in supported_gcodes_names:
-		print (color.BOLD+color.RED+'\nProvided genetic code is currently unsupported.\n\n'\
-		'If you have a new genetic code, please contact the author (with some evidence).\n\n'\
-		'Otherwise, use one of the currently supported genetic codes.\n'+color.END)
-		print (color.BOLD+color.ORANGE+'\n'.join(supported_gcodes_list)+'\n\n'+color.END)
-		print (author)
-		valid_arg += 1
-	else:	
-		if args.list_codes == True:
-			print (color.BOLD+color.RED+'\nThese are the currently supported genetic codes.\n'+color.END)
-			print (color.BOLD+color.ORANGE+'\n'.join(supported_gcodes_list)+'\n\n'+color.END)
-			valid_arg += 1	
-
-		if args.author == True:
-			print (author)
-			valid_arg += 1
-
-	if args.input_file != None:
-		if os.path.isfile(args.input_file) != False:
-			if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])):
-				print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-				'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-				' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-				valid_arg += 1
-		else:
-			print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-			'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-			' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-			valid_arg += 1
-			
-	return valid_arg
-	
-
-##########################################################################################
-###------------------ Translates CDSs from the Provided Genetic Code ------------------###
-##########################################################################################
-	
-def translate_seqs(args):
-	
-	inFasta = [i for i in SeqIO.parse(args.input_file,'fasta')]
-	
-	print (color.BOLD+'\n\n\nTranslating: '+color.CYAN+args.input_file.split('/')[-1]+color.END+\
-	color.BOLD+'\nwith the '+color.GREEN+args.genetic_code.upper()+' Genetic Code\n'+color.END)
-
-	
-	if args.genetic_code.lower() == 'ciliate' or args.genetic_code.lower() == 'tga':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=6)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'peritrich' or args.genetic_code.lower() == 'vorticella':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=peritrich_table)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'tag':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=tag_table)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'chilo' or args.genetic_code.lower() == 'chilodonella' or args.genetic_code.lower() == 'taa':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=c_uncinata_table)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'bleph' or args.genetic_code.lower() == 'blepharisma':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=blepharisma_table)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'eup' or args.genetic_code.lower() == 'euplotes':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=euplotes_table)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-
-	if args.genetic_code.lower() == 'universal':
-		translated_seqs = ['>'+seq_rec.description+'\n'+str(seq_rec.seq.translate(table=1)).rstrip('*').replace('*','X')+'\n' for seq_rec in inFasta]
-	
-	return translated_seqs
-
-
-##########################################################################################
-###---------------------------- Writes Out Translated CDSs ----------------------------###
-##########################################################################################
-
-def write_out(args):
-
-	translated_seqs = translate_seqs(args)
-	
-	## Keep only ORFs greater than 10 amino acids long
-	translated_seqs = [i for i in translated_seqs if len(i.split('\n')[1]) > 10]
-	
-	print (color.BOLD+'\nTranslated '+color.ORANGE+str(len(translated_seqs))+color.END\
-	+color.BOLD+' seqeunces using the '+color.GREEN+args.genetic_code.upper()+' Genetic Code\n\n'+color.END)
-	
-	with open(args.out_name,'w+') as w:
-		w.write(''.join(translated_seqs))
-
-
-##########################################################################################
-###--------------------- Cleans up the Folder and Moves Final Files -------------------###
-##########################################################################################
-
-def clean_up(args):
-	
-	os.system('mv '+args.input_file+' '+args.new_ntd_name)
-	
-	
-##########################################################################################
-###----------------------------- Calls on Above Functions -----------------------------###
-##########################################################################################
-
-def main():
-	
-	args = check_args()
-		
-	write_out(args)
-	
-	clean_up(args)
-		
-	print (color.BOLD+'Next Script is: '+color.PURPLE+' 4g_CountOgsUsearch.py\n\n'+color.END)
-	
-main()
--- a/PTL1/Genomes/Scripts/4_CountOGsDiamond.py
+++ b/PTL1/Genomes/Scripts/4_CountOGsDiamond.py
@ -1,301 +0,0 @@
-#!/usr/bin/env python3.5
-
-##__Updated__: 19_09_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 3g_GCodeTranslate.py --help
-
-##############################################################################
-##                                                                          ##
-## This scrip will categorize TRANSLATED CDSs into Homologous Gene Families ##
-##                                                                          ##
-##     Questions about Gene Family Binning/Source? SEE NOTES at Bottom!     ##
-##                                                                          ##
-##      E-mail Xyrus (author) for help if needed: maurerax@gmail.com        ##
-##                                                                          ##
-##############################################################################
-
-import argparse, os, re, sys
-from argparse import RawTextHelpFormatter, SUPPRESS
-from distutils import spawn
-from Bio import SeqIO
-
-
-#----------------------------- Colors For Print Statements ------------------------------#
-class color:
-   PURPLE = '\033[95m'
-   CYAN = '\033[96m'
-   DARKCYAN = '\033[36m'
-   ORANGE = '\033[38;5;214m'
-   BLUE = '\033[94m'
-   GREEN = '\033[92m'
-   YELLOW = '\033[93m'
-   RED = '\033[91m'
-   BOLD = '\033[1m'
-   UNDERLINE = '\033[4m'
-   END = '\033[0m'
-   
-
-#------------------------------ UPDATE DIAMOND PATH BELOW! -------------------------------#
-def check_diamond_path():
-	### IF Diamond is IN YOUR PATH then no updating is needed...
-	diamond_path = ''
-
-	if diamond_path == '':
-		diamond_path = spawn.find_executable("diamond")
-		#diamond_path = /path/to/diamond
-	else:
-		pass
-
-	if diamond_path == None:
-		print (color.BOLD + '\n\nPlease open this script and check that you have included'\
-		+ ' the PATH to the' + color.BLUE + ' "diamond" '+ color.END + color.BOLD\
-		+ 'executable.\n\n' + color.END)
-		print (color.BOLD + color.BLUE + 'LOOK FOR:\n\n' + color.RED\
-		+'#------------------------------ UPDATE DIAMOND PATH BELOW! -------------------------------#'\
-		+ color.BLUE + '\n\nThis is somewhere around lines 55 - 80...\n\n' + color.END)
-
-		sys.exit()
-	else:
-		pass
-
-	return diamond_path
-
-#------------------------------- Main Functions of Script --------------------------------#
-
-###########################################################################################
-###--------------------- Parses and Checks Command-Line Arguments ----------------------###
-###########################################################################################
-
-def check_args():
-
-	parser = argparse.ArgumentParser(description=
-	color.BOLD + '\n\nThis script will categorize Contigs into'+color.ORANGE+' "Homologous" '\
-	+color.END+color.BOLD+'Gene Families (OGs)\nbased on '+color.RED+'OrthoMCL'+color.END\
-	+color.BOLD+"'s Gene Family Grouping\n\n\nNotes on this script and "+color.GREEN+\
-	'OrthoMCL Families'+color.END+color.BOLD+' can be found\nat the bottom of '+color.GREEN\
-	+'THIS script (4_CountOGsDiamond.py)\n'+color.END+usage_msg(), usage=SUPPRESS,
-	formatter_class=RawTextHelpFormatter)
-	
-	required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
- 
-	required_arg_group.add_argument('--input_file','-in', action='store',
-	help=color.BOLD+color.GREEN+'Fasta file of Nucleotide sequences enriched \nwith'\
-	' Eukaryotic protein coding transcripts'+color.END)
-	required_arg_group.add_argument('--databases','-d', action='store',
-	help=color.BOLD+color.GREEN+'Path to folder containing db_OG'+color.END)
-	required_arg_group.add_argument('--evalue','-e', action='store',
-	help=color.BOLD+color.GREEN+'Maximum OG assignment e-value'+color.END)
-
-	optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
-
-	optional_arg_group.add_argument('--threads','-t', default='2',
-	help=color.BOLD+color.GREEN+' Number of threads to use for BLAST\n (default = 2)\n'+color.END)
-
-	optional_arg_group.add_argument('-author', action='store_true',
-	help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
-
-	if len(sys.argv[1:]) == 0:
-		print (parser.description)
-		print ('\n')
-		sys.exit()
-
-	args = parser.parse_args()
-	
-	quit_eval = return_more_info(args)
-	if quit_eval > 0:
-		sys.exit()
-
-	args.diamond = check_diamond_path()
-	
-	args.home_folder = '/'.join(args.input_file.split('/')[:-1]) + '/'
-	
-	args.tsv_out = args.home_folder + args.input_file.split('/')[-1].replace('CDS','CDS.Renamed').replace('.AA.fasta','_allOGCleanresults.tsv')
-	
-	args.aa_out = args.home_folder + args.input_file.split('/')[-1].replace('CDS','CDS.Renamed')
-	args.ntd_out = args.home_folder + args.input_file.split('/')[-1].replace('CDS','CDS.Renamed').replace('AA','NTD')
-
-	return args
-
-		
-###########################################################################################
-###------------------------------- Script Usage Message --------------------------------###
-###########################################################################################
-
-def usage_msg():
-	return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 4_CountOGsDiamond.py'\
-	' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.Universal.AA.fasta'+color.END)
-
-
-##########################################################################################
-###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
-##########################################################################################
-
-def return_more_info(args):
-
-	valid_arg = 0
-
-	author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
-	' maurerax@gmail.com\n\n'+color.END)
-
-	if args.author == True:
-		print (author)
-		valid_arg += 1
-
-	if args.input_file != None:
-		if os.path.isfile(args.input_file) != False:
-			if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])):
-				print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-				'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-				' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-				valid_arg += 1
-			elif args.input_file.endswith('AA.fasta') != True:
-				print (color.BOLD+'\n\nInvalid Fasta File! Only Fasta Files that were processed'\
-				' with '+color.GREEN+'3g_GCodeTranslate.py '+color.END+color.BOLD+'are valid\n\n'\
-				'However, to bypass that issue, Fasta Files MUST end with '+color.CYAN+\
-				'"AA.fasta"\n\n'+color.END)
-				valid_arg += 1
-		else:
-			print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\
-			'('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-			' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-			valid_arg += 1
-
-	if os.path.isdir(args.databases + '/db_OG') != True:
-		print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\
-		+color.ORANGE+'db_OG Folder!\n\n'+color.END+color.BOLD+'Ensure that this folder '\
-		'can be found in the main '+color.ORANGE+'Databases Folder'+color.END+color.BOLD\
-		+'\n\nThen try once again\n\n.'+color.END)
-		valid_arg += 1
-
-	elif os.path.isfile(args.databases + '/db_OG/OGSout.dmnd') != True:
-		print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\
-		'Diamond formatted '+color.ORANGE+'Gene Family databases!\n\n'+color.END+color.BOLD+\
-		'Ensure that they can be found in the '+color.ORANGE+'db_OG folder'+color.END+\
-		color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\
-		color.END+color.BOLD+'\n\nThen try once again.\n\n'+color.END)
-		valid_arg += 1
-
-	return valid_arg
-
-
-###########################################################################################
-###--------------------------- Does the Inital Folder Prep -----------------------------###
-###########################################################################################
-
-def prep_folders(args):
-
-	OG_folder = '/'.join(args.input_file.split('/')[:-1])+'/DiamondOG/'
-	
-	if os.path.isdir(OG_folder) != True:
-		os.system('mkdir '+OG_folder)		
-
-		
-###########################################################################################
-###--------------------- Runs Diamond on Split OrthoMCL Databases ----------------------###
-###########################################################################################
-
-def OG_ublast(args):
-
-	OG_diamond_cmd = args.diamond + ' blastp -q ' + args.input_file + ' -d ' + args.databases + '/db_OG/OGSout.dmnd --evalue ' + args.evalue + ' --subject-cover 0.5 --threads ' + args.threads + ' --outfmt 6 -o ' + args.input_file.split('.fas')[0] + '_allOGresults'	
-	os.system(OG_diamond_cmd)
-
-
-###########################################################################################
-###--------------- Keeps the Single BEST Hit (HSP-score) Per Transcript ----------------###
-###########################################################################################
-
-def keep_best(args):
-	print (color.BOLD+color.PURPLE+'\n\nProcessing OG-database results to keep only the BEST'\
-	'\nmatch for each transcript\n\n'+color.END)
-	
-	inTSV = [i for i in open(args.input_file.split('.fas')[0]+'_allOGresults').read().split('\n') if i != '']
-	
-	inTSV.sort(key = lambda x: -float(x.split('\t')[-1]))
-	
-	keep = []
-	for i in inTSV:
-		if any(i.split('\t')[0] in j for j in keep) != True:
-			keep.append(i)
-
-	updated_lines = list(set([line.split('\t')[0]+'_'+'_'.join(line.split('\t')[1].split('_')[-2:])+\
-	'\t'+'\t'.join(line.split('\t')[1:])+'\n' for line in keep]))
-		
-	with open(args.tsv_out, 'w+') as w:
-		for i in updated_lines:
-			w.write(i+'\n')
-	
-
-###########################################################################################
-###-------- Copies and Updates Names of Transcripts With OG Hits to New Fasta ----------###
-###########################################################################################
-
-def update_fasta(args):
-
-	print (color.BOLD+color.PURPLE+'Updating Sequence Names with their BEST OG hits\n\n'+color.END)
-
-	keep = [i for i in open(args.tsv_out).read().split('\n') if i != '']
-
-	keep_dict = {line.split('\t')[0].split('_OG5')[0]:line.split('\t')[0].split('_OG5')[0]+\
-	'_OG5_'+line.split('\t')[1].split('_')[-1] for line in keep if 'OG5' in line.split('\t')[1]}
-	
-	protFasta = [seq_rec for seq_rec in SeqIO.parse(args.input_file,'fasta')]
-	
-	ntdFasta = [seq_rec for seq_rec in SeqIO.parse(args.input_file.replace('.AA.','.NTD.'),'fasta')]
-
-	updated_prot_name = ['>'+keep_dict[i.description]+'\n'+str(i.seq).rstrip('*')+'\n' for i in protFasta if i.description in keep_dict.keys()]
-	updated_ntd_name = ['>'+keep_dict[i.description]+'\n'+str(i.seq).rstrip('*')+'\n' for i in ntdFasta if i.description in keep_dict.keys()]
-
-	with open(args.aa_out,'w+') as w:
-		for i in updated_prot_name:
-			w.write(i)
-
-	with open(args.ntd_out,'w+') as x:
-		for i in updated_ntd_name:
-			x.write(i)			
-
-
-##########################################################################################
-###--------------------- Cleans up the Folder and Moves Final Files -------------------###
-##########################################################################################
-
-def clean_up(args):
-
-	os.system('mv '+args.input_file.replace('.fasta','_allOGresults')+' '+args.home_folder+\
-	'/DiamondOG')
-
-	os.system('cp '+args.aa_out+' '+args.home_folder+'/DiamondOG/')
-	os.system('cp '+args.ntd_out+' '+args.home_folder+'/DiamondOG/')
-	os.system('cp '+args.tsv_out+' '+args.home_folder+'/DiamondOG/')
-
-
-##########################################################################################
-###----------------------------- Calls on Above Functions -----------------------------###
-##########################################################################################	
-				
-def main():
-
-	args = check_args()
-	
-	prep_folders(args)
-
-	OG_ublast(args)
-	
-	keep_best(args)
-
-	update_fasta(args)
-
-	clean_up(args)
-
-	print (color.BOLD+'Next Script is: '+color.GREEN+'5g_FinalizeName.py\n\n'+color.END)
-
-main()
-
-#----------------------------------------- NOTES -----------------------------------------#
-#
-# This script uses a "BLAST"-based approach to identify ANCIENT homologous gene families.
-#
-# Gene family designations were taken from OrthoMCL.org and serve as the database for 
-# this script's gene family assignments. These gene family assignments are NON-EXHAUSTIVE
-# and most Lineage-Specific families will be missed!
-#
-# If you have any questions contact Xyrus (author): maurerax@gmail.com
--- a/PTL1/Genomes/Scripts/5_FinalizeName.py
+++ b/PTL1/Genomes/Scripts/5_FinalizeName.py
@ -1,374 +0,0 @@
-#!/usr/bin/env python3.5
-
-##__Updated__: 20_09_2017
-##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
-##__Usage__: python 5g_FinalizeName.py --help
-
-##################################################################################################
-## This script is intended to rename the outputs of the FilterPartials script					##
-## to a given 10-character that is used in the Katz lab Phylogenomic Tree building methods		##
-##																								##
-## Prior to running this script, ensure the following:											##
-##																								##
-## 1. You have assembled your transcriptome and COPIED the 'assembly' file 						##
-##    (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder							##
-## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py			##
-## 3. Removed SSU/LSU sequences from your Fasta File											##
-## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined				##
-## 5. Classified the Non-Strongly Prokaryotic sequences into OGs 								##
-## 6. You either know (or have inferred) the genetic code of the organism						##
-## 7. You have translated the sequences and checked for the data in the RemovePartials folder	##
-## 8. Partial sequences have been removed from the transcriptomic data sets						##
-##																								##
-## 										COMMAND Example Below									##
-##									Extra Notes at Bottom of Script								##
-##																								##
-## 					E-mail Xyrus (author) for help if needed: maurerax@gmail.com				##
-##																								##
-##										Next Script(s) to Run: 									##
-##                                     NONE! You're FINISHED! :D                                ##
-##																								##
-##################################################################################################
-
-import argparse, os, sys
-from argparse import RawTextHelpFormatter,SUPPRESS
-
-#----------------------- Solely to Make Print Statements Colorful -----------------------#
-
-class color:
-   PURPLE = '\033[95m'
-   CYAN = '\033[96m'
-   DARKCYAN = '\033[36m'
-   ORANGE = '\033[38;5;214m'
-   BLUE = '\033[94m'
-   GREEN = '\033[92m'
-   YELLOW = '\033[93m'
-   RED = '\033[91m'
-   BOLD = '\033[1m'
-   UNDERLINE = '\033[4m'
-   END = '\033[0m'
-
-
-#------------------------------- Main Functions of Script --------------------------------#
-
-###########################################################################################
-###--------------------- Parses and Checks Command-Line Arguments ----------------------###
-###########################################################################################
-
-def check_args():
-
-	parser = argparse.ArgumentParser(description=
-	color.BOLD + '\n\nThis script is intended to '+color.RED+'Rename '+color.END\
-	+color.BOLD+'the core set of '+color.PURPLE+'ORFS\n'+color.END+color.BOLD+'with a valid '\
-	+color.RED+'10-character code'+color.END+color.BOLD+' for use in the KatzLab\nPhylogenomic Pipeline'\
-	+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter)
-	
-	required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
- 
-	required_arg_group.add_argument('--input_file','-in', action='store',
-	help=color.BOLD+color.GREEN+' One of the Fasta files that is to be renamed\n'+color.END)
-	required_arg_group.add_argument('--name','-n', action='store',
-	help=color.BOLD+color.GREEN+' A valid 10-Character code for updating the data\n'+color.END)
-
-
-	optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
-
-	optional_arg_group.add_argument('-author', action='store_true',
-	help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
-
-	if len(sys.argv[1:]) == 0:
-		print (parser.description)
-		print ('\n')
-		sys.exit()
-
-	args = parser.parse_args()
-	
-	quit_eval = return_more_info(args)
-	if quit_eval > 0:
-		print ('\n')
-		sys.exit()
-
-	args.all_output_folder = '/'.join(args.input_file.split('/')[:-3])
-
-	args.r2g_aa = args.all_output_folder + '/ReadyToGo/ReadyToGo_AA/'
-	args.r2g_ntd = args.all_output_folder + '/ReadyToGo/ReadyToGo_NTD/'
-	args.r2g_tsv = args.all_output_folder + '/ReadyToGo/ReadyToGo_TSV/'
-	args.r2g_xml = args.all_output_folder + '/ReadyToGo/ReadyToGo_XML/'
-	
-	args.xml_out = args.input_AA.split('/')[-1]+'_1e-10keepall_BlastOutall.oneHit'
-	
-	check_code(args)
-	
-	return args
-
-
-###########################################################################################
-###------------------------------- Script Usage Message --------------------------------###
-###########################################################################################
-
-def usage_msg():
-	return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 5g_FinalizeName.py'\
-	' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.Renamed.Universal.AA.fasta'\
-	' --name Sr_ci_Scer'+color.END)
-
-
-##########################################################################################
-###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
-##########################################################################################
-
-def return_more_info(args):
-
-	valid_args = 0
-
-	author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
-	' maurerax@gmail.com\n\n'+color.END)
-
-	if args.author == True:
-		print (author)
-		valid_args += 1
-
-	if args.input_file.endswith('AA.fasta'):
-		args.input_NTD = args.input_file.replace('AA.fasta','NTD.fasta')
-		args.input_AA = args.input_file
-		args.input_TSV = args.input_file.replace('.AA.fasta','_allOGCleanresults.tsv')
-
-	elif args.input_file.endswith('NTD.fasta'):
-		args.input_NTD = args.input_file
-		args.input_AA = args.input_file.replace('NTD.fasta','AA.fasta')
-		args.input_TSV = args.input_file.replace('.NTD.fasta','_allOGCleanresults.tsv')
-
-	if os.path.isfile(args.input_NTD) != True:
-		print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Nucleotide '\
-		'Fasta file ('+color.DARKCYAN+args.input_NTD.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-		' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-		valid_args += 1
-
-	if os.path.isfile(args.input_AA) != True:
-		print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Protein '\
-		'Fasta file ('+color.DARKCYAN+args.input_AA.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-		' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-		valid_args += 1
-
-	if os.path.isfile(args.input_TSV) != True:
-		print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Nucleotide '\
-		'Fasta file ('+color.DARKCYAN+args.input_TSV.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
-		' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) 
-		valid_args += 1
-
-	return valid_args
-
-###########################################################################################
-###-------------------- Double Checks Format for 10-Character Code ---------------------###
-###########################################################################################
-
-def check_code(args):
-	
-	check_name = args.name.split('_')
-	
-	if len(args.name) != 10:
-		print (color.BOLD+'\n\nNew Species Prefix is not 10 characters long\n\n')
-		print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
-		'Am_ar_Ehis\n\n'+color.END) 
-		sys.exit()
-
-	elif args.name.count('_') != 2:
-		print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n')
-		print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
-		'Am_ar_Ehis\n\n'+color.END) 
-		sys.exit()
-
-	if len(check_name[0]) == 2 and len(check_name[1]) == 2 and len(check_name[2]) == 4:
-		print (color.BOLD+"\n\nRenaming "+color.ORANGE+args.input_file.split('/')[-1]\
-		.split('_Filtered')[0]+color.END+color.BOLD+"'s files\nusing the following 10-character "\
-		"code: "+color.CYAN+args.name+color.END+'\n')
-		
-	else:
-		print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n')
-		print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
-		'Am_ar_Ehis\n\n'+color.END) 
-		sys.exit()
-
-			
-##########################################################################################
-###------------------------- Creates Folders For Storing Data -------------------------###
-##########################################################################################
-
-def prep_folders(args):
-	
-	if os.path.isdir(args.all_output_folder + '/ReadyToGo/') != True:
-		os.system('mkdir ' + args.all_output_folder + '/ReadyToGo')
-
-	if os.path.isdir(args.all_output_folder + '/ReadyToGo/ReadyToGo_NTD/') != True:
-		os.system('mkdir '+args.r2g_ntd)
-	if os.path.isdir(args.all_output_folder + '/ReadyToGo/ReadyToGo_AA/') != True:
-		os.system('mkdir '+args.r2g_aa)
-	if os.path.isdir(args.all_output_folder + '/ReadyToGo/ReadyToGo_TSV/') != True:
-		os.system('mkdir '+args.r2g_tsv)
-	if os.path.isdir(args.all_output_folder + '/ReadyToGo/ReadyToGo_XML/') != True:
-		os.system('mkdir '+args.r2g_xml)
-
-
-###########################################################################################
-###----------- Renames the NTD and AA CDSs with the Given 10-Character Code ------------###
-###########################################################################################
-
-def rename_paralogs(args):
-
-	home_folder = '/'.join(args.input_AA.split('/')[:-2]) + '/'
-
-	print('HOME ' + home_folder)
-
-	print (color.BOLD+'\nRenaming Translated (Protein) '+color.PURPLE+'ORFs\n'+color.END)
-	renamed_Final_Prots = open(args.input_AA).read().replace('>','>'+args.name+'_')
-	
-	print (color.BOLD+'\nRenaming Nucleotide '+color.PURPLE+'ORFs\n'+color.END)
-	renamed_Final_Nucs = open(args.input_NTD).read().replace('>','>'+args.name+'_')
-
-	
-	print (color.BOLD+'\nUpdating CDS Names in the Spreadsheet'+color.END)
-	if '\n\n' in open(args.input_TSV).read():
-		renamed_Final_tsv = open(args.input_TSV).read().rstrip('\n')\
-		.replace('\n\n','\n'+args.name+'_')
-	else:
-		renamed_Final_tsv = open(args.input_TSV).read().rstrip('\n')\
-		.replace('\n','\n'+args.name+'_')
-		
-	with open(home_folder + args.input_AA.split('/')[-1],'w+') as w:
-		w.write(renamed_Final_Prots)
-
-	with open(home_folder + args.input_NTD.split('/')[-1],'w+') as x:
-		x.write(renamed_Final_Nucs)
-	
-	with open(home_folder + args.input_TSV.split('/')[-1],'w+') as y:
-		y.write(renamed_Final_tsv)
-
-
-###########################################################################################
-###--------------------------------- Header/Tail Lines ---------------------------------###
-###########################################################################################
-
-def header_tail():
-	header = '<?xml version="1.0"?>\n<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n'\
-		'<BlastOutput>\n  <BlastOutput_program>blastp</BlastOutput_program>\n  <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>\n'\
-		'  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n'\
-		'  <BlastOutput_db>../OGBlastDB/renamed_aa_seqs_OrthoMCL-5_12653.fasta</BlastOutput_db>\n  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n'
-
-	tail = '</BlastOutput_iterations>\n</BlastOutput>'
-	return header, tail
-
-
-###########################################################################################
-###------------------------------- TSV to XML Conversion -------------------------------###
-###########################################################################################
-
-def convert_TSV_data(args):
-
-	home_folder = '/'.join(args.input_AA.split('/')[:-2])
-
-	TSVforConvert = home_folder+ '/' + args.input_TSV.split('/')[-1]
-
-	inTSV = [line.rstrip('\n') for line in open(TSVforConvert).readlines() if line != '\n']
-
-	iterations = []
-
-	for n in range(len(inTSV)):
-		if n == 0:
-			iterations.append('  <BlastOutput_query-def>'+inTSV[n].split('\t')[0]+'</BlastOutput_query-def>\n  <BlastOutput_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</BlastOutput_query-len>\n'\
-				'  <BlastOutput_param>\n    <Parameters>\n      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n      <Parameters_expect>1e-10</Parameters_expect>\n'\
-				'      <Parameters_gap-open>11</Parameters_gap-open>\n      <Parameters_gap-extend>1</Parameters_gap-extend>\n      <Parameters_filter>F</Parameters_filter>\n'\
-				'    </Parameters>\n  </BlastOutput_param>\n<BlastOutput_iterations>\n<Iteration>\n  <Iteration_iter-num>1</Iteration_iter-num>\n  <Iteration_query-ID>Query_1</Iteration_query-ID>\n'\
-				'  <Iteration_query-def>'+inTSV[n].split('\t')[0]+'</Iteration_query-def>\n  <Iteration_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Iteration_query-len>\n'\
-				'<Iteration_hits>\n<Hit>\n  <Hit_num>1</Hit_num>\n  <Hit_id>Fake_Entry</Hit_id>\n  <Hit_def>'+inTSV[n].split('\t')[1]+'</Hit_def>\n  <Hit_accession>Fake_Accession</Hit_accession>\n'\
-				'  <Hit_len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Hit_len>\n  <Hit_hsps>\n    <Hsp>\n      <Hsp_num>1</Hsp_num>\n      <Hsp_bit-score>1234</Hsp_bit-score>\n'\
-				'      <Hsp_score>'+inTSV[n].split('\t')[-1]+'</Hsp_score>\n      <Hsp_evalue>'+inTSV[n].split('\t')[-2]+'</Hsp_evalue>\n      <Hsp_query-from>'+inTSV[n].split('\t')[-4]+'</Hsp_query-from>\n'\
-				'      <Hsp_query-to>'+inTSV[n].split('\t')[-3]+'</Hsp_query-to>\n      <Hsp_hit-from>'+inTSV[n].split('\t')[-4]+'</Hsp_hit-from>\n      <Hsp_hit-to>'+inTSV[n].split('\t')[-3]+'</Hsp_hit-to>\n'\
-				'      <Hsp_query-frame>0</Hsp_query-frame>\n      <Hsp_hit-frame>0</Hsp_hit-frame>\n      <Hsp_identity>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_identity>\n'\
-				'      <Hsp_positive>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_positive>\n      <Hsp_gaps>0</Hsp_gaps>\n      <Hsp_align-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_align-len>\n'\
-				'      <Hsp_qseq></Hsp_qseq>\n      <Hsp_hseq></Hsp_hseq>\n      <Hsp_midline></Hsp_midline>\n    </Hsp>\n  </Hit_hsps>\n</Hit>\n'\
-				'\n</Iteration_hits>\n  <Iteration_stat>\n    <Statistics>\n      <Statistics_db-num>379660</Statistics_db-num>\n      <Statistics_db-len>197499634</Statistics_db-len>\n'\
-				'      <Statistics_hsp-len>123</Statistics_hsp-len>\n      <Statistics_eff-space>184705217500</Statistics_eff-space>\n      <Statistics_kappa>0.041</Statistics_kappa>\n'\
-				'      <Statistics_lambda>0.267</Statistics_lambda>\n      <Statistics_entropy>0.14</Statistics_entropy>\n    </Statistics>\n  </Iteration_stat>\n</Iteration>\n')
-		else:
-			iterations.append('<Iteration>\n  <Iteration_iter-num>'+str(n+1)+'</Iteration_iter-num>\n  <Iteration_query-ID>Query_'+str(n+1)+'</Iteration_query-ID>\n'\
-				'  <Iteration_query-def>'+inTSV[n].split('\t')[0]+'</Iteration_query-def>\n  <Iteration_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Iteration_query-len>\n'\
-				'<Iteration_hits>\n<Hit>\n  <Hit_num>1</Hit_num>\n  <Hit_id>Fake_Entry</Hit_id>\n  <Hit_def>'+inTSV[n].split('\t')[1]+'</Hit_def>\n  <Hit_accession>Fake_Accession</Hit_accession>\n'\
-				'  <Hit_len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Hit_len>\n  <Hit_hsps>\n    <Hsp>\n      <Hsp_num>1</Hsp_num>\n      <Hsp_bit-score>1234</Hsp_bit-score>\n'\
-				'      <Hsp_score>'+inTSV[n].split('\t')[-1]+'</Hsp_score>\n      <Hsp_evalue>'+inTSV[n].split('\t')[-2]+'</Hsp_evalue>\n      <Hsp_query-from>'+inTSV[n].split('\t')[-4]+'</Hsp_query-from>\n'\
-				'      <Hsp_query-to>'+inTSV[n].split('\t')[-3]+'</Hsp_query-to>\n      <Hsp_hit-from>'+inTSV[n].split('\t')[-4]+'</Hsp_hit-from>\n      <Hsp_hit-to>'+inTSV[n].split('\t')[-3]+'</Hsp_hit-to>\n'\
-				'      <Hsp_query-frame>0</Hsp_query-frame>\n      <Hsp_hit-frame>0</Hsp_hit-frame>\n      <Hsp_identity>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_identity>\n'\
-				'      <Hsp_positive>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_positive>\n      <Hsp_gaps>0</Hsp_gaps>\n      <Hsp_align-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_align-len>\n'\
-				'      <Hsp_qseq></Hsp_qseq>\n      <Hsp_hseq></Hsp_hseq>\n      <Hsp_midline></Hsp_midline>\n    </Hsp>\n  </Hit_hsps>\n</Hit>\n'\
-				'\n</Iteration_hits>\n  <Iteration_stat>\n    <Statistics>\n      <Statistics_db-num>379660</Statistics_db-num>\n      <Statistics_db-len>197499634</Statistics_db-len>\n'\
-				'      <Statistics_hsp-len>123</Statistics_hsp-len>\n      <Statistics_eff-space>184705217500</Statistics_eff-space>\n      <Statistics_kappa>0.041</Statistics_kappa>\n'\
-				'      <Statistics_lambda>0.267</Statistics_lambda>\n      <Statistics_entropy>0.14</Statistics_entropy>\n    </Statistics>\n  </Iteration_stat>\n</Iteration>\n')
-
-	return iterations
-
-
-###########################################################################################
-###--------------------------- Writes Out the Fake XML File ----------------------------###
-###########################################################################################
-
-def write_Fake_XML(args):
-
-	home_folder = '/'.join(args.input_AA.split('/')[:-2]) + '/'
-
-	print (color.BOLD+'\n\nConverting '+color.ORANGE+args.name+'_XX_'+args.input_TSV.split('/')[-1]\
-	+color.END+color.BOLD+' to XML format\n'+color.END)
-
-	header, tail = header_tail()
-	
-	iterations = convert_TSV_data(args)	
-	
-	with open(home_folder+args.xml_out,'w+') as w:
-		w.write(header)
-		w.write(''.join(iterations))
-		w.write(tail)
-		
-##########################################################################################
-###-------------------- Cleans up the Folder and Moves Final Files --------------------###
-##########################################################################################
-def clean_up(args):
-
-	final_folder = '/'.join(args.input_file.split('/')[:-2]) + '/'
-	
-	os.system('rm '+args.input_AA)
-	os.system('rm '+args.input_NTD)
-	os.system('rm '+args.input_TSV)
-		
-	os.system('cp '+final_folder+'*Renamed.*.AA.fasta '+args.r2g_aa)
-	os.system('cp '+final_folder+'*Renamed.*.NTD.fasta '+args.r2g_ntd)
-	os.system('cp '+final_folder+'*.Renamed.*_allOGCleanresults.tsv '+args.r2g_tsv)
-	os.system('cp '+final_folder+'*oneHit '+args.r2g_xml)
-		
-###########################################################################################
-###-------------------------------- Next Script Message --------------------------------###
-###########################################################################################
-
-def next_script(args):
-
-	print (color.BOLD+'\nThere is no next script! The final '+color.ORANGE+args.xml_out\
-	.split('_XX')[0]+color.END+color.BOLD+' files can be\nfound in the '+color.RED+\
-	args.xml_out.split('_XX_')[-1].split('.Renamed')[0]+'.Prep'+color.END+color.BOLD+' and '\
-	+color.RED+'ReadyToGo folders'+color.END+color.BOLD+' and are ready\n'\
-	'for the KatzLab Phylogenomic Tree-Building Steps!\n\n'+color.END)
-
-##########################################################################################
-###--------------- Checks Command Line Arguments and Calls on Functions ---------------###
-##########################################################################################
-			
-def main():
-
-	args = check_args()
-		
-	prep_folders(args)
-	
-	rename_paralogs(args)
-	
-	write_Fake_XML(args)
-	
-	clean_up(args)
-	
-	next_script(args)
-	
-main()
--- a/PTL1/Genomes/Scripts/wrapper.py
+++ b/PTL1/Genomes/Scripts/wrapper.py
@ -1,170 +0,0 @@
-import os, sys, re
-import argparse
-
-
-def get_args():
-
-	parser = argparse.ArgumentParser(
-                prog = 'PhyloToL v6.0 Part 1 for GenBank Genomes',
-                description = "Updated January 19th, 2023 by Auden Cote-L'Heureux. Link to GitHub: https://github.com/AudenCote/PhyloToL_v6.0"
-                )
-
-	parser.add_argument('-s', '--script', default = -1, type = int, choices = { 1, 2, 3, 4, 5, 6 }, help = 'Script to run if you are only running one script')
-	parser.add_argument('-1', '--first_script', default = -1, type = int, choices = { 1, 2, 3, 4, 5 }, help = 'First script to run')
-	parser.add_argument('-2', '--last_script', default = -1, type = int, choices = { 2, 3, 4, 5, 6 }, help = 'First script to run')
-	parser.add_argument('-c', '--cds', type = str, help = 'Path to a folder of nucleotide CDS. Each file name should start with a unique 10 digit code, and end in "_GenBankCDS.fasta", E.g. Op_me_hsap_GenBankCDS.fasta')
-	parser.add_argument('-o', '--output', default = '../', type = str, help = 'An "Output" folder will be created at this directory to contain all output files. By default this folder will be created at the parent directory of the Scripts folder')
-	parser.add_argument('-x', '--xplate_contam', action = 'store_true', help = 'Run cross-plate contamination removal (includes all files)')
-	parser.add_argument('-g', '--genetic_code', type = str, help = 'If all of your taxa use the same genetic code, you may enter it here (to be used in script 4). Otherwise, stop after script 3 and fill in "gcode_output.tsv" before running script 4')
-	parser.add_argument('-l', '--minlen', type = int, default = 200, help = 'Minimum CDS length')
-	parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder (which should contain db_OG)')
-
-	return parser.parse_args()
-
-
-def script_one(args, ten_digit_codes):
-
-	for file in os.listdir(args.cds):
-		if file[10:] == '_GenBankCDS.fasta' and file[:10] in ten_digit_codes:
-			os.system('python 1_RenameCDS.py -in ' + args.cds + '/' + file + ' -s GenBank -o ' + args.output + '/Output')
-
-
-def script_two(args):
-
-	for folder in os.listdir(args.output + '/Output'):
-		if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta'):
-			os.system('python 2_GCodeEval.py --input_file ' + args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta')
-
-	gcode_info = []
-	for folder in os.listdir(args.output + '/Output'):
-		if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.GeneticCode.txt'):
-				with open(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.GeneticCode.txt') as f:
-					gcode_temp = [folder]
-					for line in f:
-						line_sep = line.strip().split('\t')
-						if line_sep[0] == 'TGA':
-							gcode_temp.append(line_sep[1])
-						elif line_sep[0] == 'TAG':
-							gcode_temp.append(line_sep[1])
-						elif line_sep[0] == 'TAA':
-							gcode_temp.append(line_sep[1])
-
-					gcode_info.append(gcode_temp)
-								
-	with open(args.output + '/Output/gcode_output.tsv', 'w') as g:
-		g.writelines('10 Digit Code\tIn-frame TGA Density\tIn-frame TAG Density\tIn-frame TAA Density\tGenetic Code\n') 
-		for row in gcode_info:
-			g.writelines(row[0] + '\t' + row[1] + '\t' + row[2] + '\t' + row[3] + '\n')
-
-
-def script_three(args):
-
-	valid_codes = ['universal', 'blepharisma', 'chilodonella', 'condylostoma', 'euplotes', 'peritrich', 'vorticella', 'mesodinium', 'tag', 'tga', 'taa', 'none']
-
-	if args.genetic_code != None and args.genetic_code.lower() in valid_codes:
-		for folder in os.listdir(args.output + '/Output'):
-			if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta'):
-				os.system('python 3_GCodeTranslate.py -in ' + args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta -g ' + args.genetic_code.lower())
-	else:
-		lines = [line.strip().split('\t') for line in open(args.output + '/Output/gcode_output.tsv', 'r')]
-		with open(args.output + '/Output/gcode_output.tsv', 'r') as g:
-			for folder in os.listdir(args.output + '/Output'):
-				if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta'):
-					for line in lines:
-						if line[0] == folder and line[-1].lower() in valid_codes:
-							os.system('python 3_GCodeTranslate.py -in ' + args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Prepped.fasta -g ' + line[-1])
-						elif line[-1].lower() not in valid_codes and line[-1] != 'Genetic Code':
-							print('\n' + line[-1] + ' is not a valid genetic code. Skipping taxon ' + folder + '.\n')
-
-
-def script_four(args):
-
-	for folder in os.listdir(args.output + '/Output'):
-		if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Universal.AA.fasta'):
-			os.system('python 4_CountOGsDiamond.py -in ' + args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Universal.AA.fasta -t 30 --databases ' + args.databases + ' --evalue 1e-15')
-	
-
-
-def script_five(args):
-
-	for folder in os.listdir(args.output + '/Output'):
-		if os.path.isfile(args.output + '/Output/' + folder + '/' + folder + '_GenBankCDS.Renamed.Universal.AA.fasta'):
-			step5_cmd = 'python 5_FinalizeName.py -in ' + args.output + '/Output/' + folder + '/DiamondOG/' + folder + '_GenBankCDS.Renamed.Universal.AA.fasta -n ' + folder
-			os.system(step5_cmd)
-
-	os.mkdir(args.output + '/Output/Intermediate')
-
-	for file in os.listdir(args.output + '/Output'):
-		if file != 'ReadyToGo' and file != 'Intermediate':
-			os.system('mv ' + args.output + '/Output/' + file + ' ' + args.output + '/Output/Intermediate')
-
-
-
-if __name__ == "__main__":
-
-	args = get_args()
-
-	if (args.first_script == 1 or args.script == 1) and not os.path.isdir(args.cds):
-		print('\nIf starting at the first script, a valid path to a folder of nucleotide CDS files (which must end in .fasta) should be input using the --cds argument')
-		quit()
-
-	ten_digit_codes = []
-	if args.first_script == 1 or args.script == 1:
-		for file in os.listdir(args.cds):
-			if file[10:] == '_GenBankCDS.fasta':
-				ten_digit_codes.append(file[:10])
-	else:
-		if not os.path.isdir(args.output + '/Output'):
-			print('\nA folder called "Output" is not found at the given output path. Enter the correct path for --output or start from script 1.\n')
-
-	if(len(ten_digit_codes) > len(list(dict.fromkeys(ten_digit_codes)))):
-		print('\nDuplicate 10-digit codes are not allowed. Aborting.\n')
-		quit()
-
-	for code in ten_digit_codes:
-		for c, char in enumerate(code):
-			if (c != 2 and c != 5 and char not in 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890') or ((c == 2 or c == 5) and char != '_'):
-				print('\n' + code + ' is an invalid 10-digit code sample identifier. It must of the format Op_me_hsap (Homo sapiens for example). Please ask for help if this does not make sense.\n')
-				quit()
-
-	if os.path.isdir(args.output + '/Output') and (args.first_script == 1 or args.script == 1):
-		print('\nAn "Output" folder already exists at the given path. Please delete or rename this folder and try again.\n')
-		quit()
-	elif not os.path.isdir(args.output + '/Output'):
-		os.mkdir(args.output + '/Output')
-	
-	scripts = [0, script_one, script_two, script_three, script_four, script_five]
-
-	if args.script == -1:
-		if args.first_script < args.last_script:
-			for i in range(1 + args.last_script - args.first_script):
-				print('\nRunning script ' + str(i + args.first_script) + '...\n')
-				if i + args.first_script == 1:
-					scripts[i + args.first_script](args, ten_digit_codes)
-				else:
-					scripts[i + args.first_script](args)
-		else:
-			print('\nInvalid script combination: the first script must be less than the last script. If you want to use only once script, use the --script argument.\n')
-			quit()
-	else:
-		if args.script == 1:
-			scripts[args.script](args, ten_digit_codes)
-		else:
-			scripts[args.script](args)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/PTL1/Genomes/Scripts/wrapper_submit.sh
+++ b/PTL1/Genomes/Scripts/wrapper_submit.sh
@ -1,23 +0,0 @@
-#!/bin/bash
-#
-#SBATCH --job-name=PTL1_genome
-#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
-#SBATCH --mem=160G
-#SBATCH --mail-type=ALL
-#SBATCH --mail-user=YOUREMAIL@smith.edu
-
-module purge       #Cleans up any loaded modules
-module use /gridapps/modules/all    #make sure module locations is loaded
-
-module load slurm
-module load Biopython/1.75-foss-2019b-Python-3.7.4
-module load BLAST+
-module load DIAMOND/0.9.30-GCC-8.3.0
-
-export PATH=$PATH:/Users/katzlab/scratch/katzlab/grid_phylotol_setup/programs/standard-RAxML-master
-export PATH=$PATH:/Users/katzlab/scratch/katzlab/grid_vsearch_setup/vsearch-2.15.1-linux-x86_64/bin
-
-python wrapper.py -1 1 -2 5 --cds ../TestData --genetic_code Universal --databases ../Databases