mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 06:50:24 +08:00
Delete PTL1/Transcriptomes/Scripts/7_FinalizeName.py
This commit is contained in:
parent
5f28d4447f
commit
f9d747ce38
@ -1,398 +0,0 @@
|
||||
#!/usr/bin/env python3.5
|
||||
|
||||
##__Updated__: 31_08_2017
|
||||
##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com
|
||||
##__Usage__: python 6_FilterPartials.py --help
|
||||
|
||||
##################################################################################################
|
||||
## This script is intended to rename the outputs of the FilterPartials script ##
|
||||
## to a given 10-character that is used in the Katz lab Phylogenomic Tree building methods ##
|
||||
## ##
|
||||
## Prior to r`ning this script, ensure the following: ##
|
||||
## ##
|
||||
## 1. You have assembled your transcriptome and COPIED the 'assembly' file ##
|
||||
## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ##
|
||||
## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ##
|
||||
## 3. Removed SSU/LSU sequences from your Fasta File ##
|
||||
## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ##
|
||||
## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ##
|
||||
## 6. You either know (or have inferred) the genetic code of the organism ##
|
||||
## 7. You have translated the sequences and checked for the data in the RemovePartials folder ##
|
||||
## 8. Partial sequences have been removed from the transcriptomic data sets ##
|
||||
## ##
|
||||
## COMMAND Example Below ##
|
||||
## Extra Notes at Bottom of Script ##
|
||||
## ##
|
||||
## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ##
|
||||
## ##
|
||||
## Next Script(s) to Run: ##
|
||||
## NONE! You're FINISHED! :D ##
|
||||
## ##
|
||||
##################################################################################################
|
||||
|
||||
import argparse, os, sys
|
||||
from argparse import RawTextHelpFormatter,SUPPRESS
|
||||
|
||||
#----------------------- Solely to Make Print Statements Colorful -----------------------#
|
||||
|
||||
class color:
|
||||
PURPLE = '\033[95m'
|
||||
CYAN = '\033[96m'
|
||||
DARKCYAN = '\033[36m'
|
||||
ORANGE = '\033[38;5;214m'
|
||||
BLUE = '\033[94m'
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BOLD = '\033[1m'
|
||||
UNDERLINE = '\033[4m'
|
||||
END = '\033[0m'
|
||||
|
||||
|
||||
#------------------------------- Main Functions of Script --------------------------------#
|
||||
|
||||
###########################################################################################
|
||||
###--------------------- Parses and Checks Command-Line Arguments ----------------------###
|
||||
###########################################################################################
|
||||
|
||||
def check_args():
|
||||
|
||||
parser = argparse.ArgumentParser(description=
|
||||
color.BOLD + '\n\nThis script is intended to '+color.RED+'Rename '+color.END\
|
||||
+color.BOLD+'the core set of '+color.PURPLE+'ORFS\n'+color.END+color.BOLD+'with a valid '\
|
||||
+color.RED+'10-character code'+color.END+color.BOLD+' for use in the KatzLab\nPhylogenomic Pipeline'\
|
||||
+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter)
|
||||
|
||||
required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END)
|
||||
|
||||
required_arg_group.add_argument('--input_file','-in', action='store',
|
||||
help=color.BOLD+color.GREEN+' One of the Fasta files that is to be renamed\n'+color.END)
|
||||
required_arg_group.add_argument('--name','-n', action='store',
|
||||
help=color.BOLD+color.GREEN+' A valid 10-Character code for updating the data\n'+color.END)
|
||||
|
||||
|
||||
optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END)
|
||||
|
||||
optional_arg_group.add_argument('-author', action='store_true',
|
||||
help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END)
|
||||
|
||||
if len(sys.argv[1:]) == 0:
|
||||
print (parser.description)
|
||||
print ('\n')
|
||||
sys.exit()
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
quit_eval = return_more_info(args)
|
||||
if quit_eval > 0:
|
||||
print ('\n')
|
||||
sys.exit()
|
||||
|
||||
args.all_output_folder = '/'.join(args.input_file.split('/')[:-2])
|
||||
|
||||
if '.allOGCleanresults' in args.input_TSV:
|
||||
args.out_XML = args.name+'_XX_'+args.input_TSV.split('/')[-1].replace('.allOGCleanresults.','.AA.ORF.')\
|
||||
.replace('.tsv','.fasta')+'_1e-10keepall_BlastOutall.oneHit'
|
||||
else:
|
||||
args.out_XML = args.name+'_XX_'+args.input_TSV.split('/')[-1].replace('_allOGCleanresults.','_AA.ORF.')\
|
||||
.replace('.tsv','.fasta')+'_1e-10keepall_BlastOutall.oneHit'
|
||||
|
||||
args.file_prefix = args.input_file.split('/')[-1].split('_Filtered.Final')[0]
|
||||
if 'fasta' in args.file_prefix:
|
||||
args.file_prefix = args.name
|
||||
|
||||
args.r2g_aa = args.all_output_folder + '/ReadyToGo/ReadyToGo_AA/'
|
||||
args.r2g_ntd = args.all_output_folder + '/ReadyToGo/ReadyToGo_NTD/'
|
||||
args.r2g_tsv = args.all_output_folder + '/ReadyToGo/ReadyToGo_TSV/'
|
||||
args.r2g_xml = args.all_output_folder + '/ReadyToGo/ReadyToGo_XML/'
|
||||
|
||||
|
||||
return args
|
||||
|
||||
|
||||
###########################################################################################
|
||||
###------------------------------- Script Usage Message --------------------------------###
|
||||
###########################################################################################
|
||||
|
||||
def usage_msg():
|
||||
return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 7_FinalizeName.py'\
|
||||
' --input_file ../ToRename/Op_me_Xxma_Filtered.Final.AA.ORF.fasta --name Op_me_Xxma'+color.END)
|
||||
|
||||
|
||||
##########################################################################################
|
||||
###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------###
|
||||
##########################################################################################
|
||||
|
||||
def return_more_info(args):
|
||||
|
||||
valid_args = 0
|
||||
|
||||
author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\
|
||||
' maurerax@gmail.com\n\n'+color.END)
|
||||
|
||||
if args.author == True:
|
||||
print (author)
|
||||
valid_args += 1
|
||||
|
||||
if args.input_file.endswith('AA.ORF.fasta'):
|
||||
args.input_NTD = args.input_file.replace('AA.ORF.fasta','NTD.ORF.fasta')
|
||||
args.input_AA = args.input_file
|
||||
# args.input_TSV = ('/').join(args.input_file.split('/')[:-1])+'/SpreadSheets/'+args.input_file.split('/')[-1].replace('AA.ORF.fasta','allOGCleanresults.tsv')
|
||||
args.input_TSV = args.input_file.replace('AA.ORF.fasta','allOGCleanresults.tsv')
|
||||
|
||||
elif args.input_file.endswith('NTD.ORF.fasta'):
|
||||
args.input_NTD = args.input_file
|
||||
args.input_AA = args.input_file.replace('NTD.ORF.fasta','AA.ORF.fasta')
|
||||
# args.input_TSV = ('/').join(args.input_file.split('/')[:-1])+'/SpreadSheets/'+args.input_file.split('/')[-1].replace('NTD.ORF.fasta','allOGCleanresults.tsv')
|
||||
args.input_TSV = args.input_file.replace('AA.ORF.fasta','allOGCleanresults.tsv')
|
||||
print(args.input_TSV)
|
||||
|
||||
if os.path.isfile(args.input_NTD) != True:
|
||||
print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Nucleotide '\
|
||||
'Fasta file ('+color.DARKCYAN+args.input_NTD.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
|
||||
' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END)
|
||||
valid_args += 1
|
||||
|
||||
if os.path.isfile(args.input_AA) != True:
|
||||
print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Protein '\
|
||||
'Fasta file ('+color.DARKCYAN+args.input_AA.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
|
||||
' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END)
|
||||
valid_args += 1
|
||||
|
||||
if os.path.isfile(args.input_TSV) != True:
|
||||
print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided TSV '\
|
||||
' file ('+color.DARKCYAN+args.input_TSV.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\
|
||||
' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END)
|
||||
valid_args += 1
|
||||
|
||||
return valid_args
|
||||
|
||||
###########################################################################################
|
||||
###-------------------- Double Checks Format for 10-Character Code ---------------------###
|
||||
###########################################################################################
|
||||
|
||||
def check_code(args):
|
||||
|
||||
check_name = args.name.split('_')
|
||||
|
||||
if len(args.name) != 10:
|
||||
print (color.BOLD+'\n\nNew Species Prefix is not 10 characters long\n\n')
|
||||
print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
|
||||
'Am_ar_Ehis\n\n'+color.END)
|
||||
sys.exit()
|
||||
|
||||
elif args.name.count('_') != 2:
|
||||
print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n')
|
||||
print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
|
||||
'Am_ar_Ehis\n\n'+color.END)
|
||||
|
||||
sys.exit()
|
||||
|
||||
if len(check_name[0]) == 2 and len(check_name[1]) == 2 and len(check_name[2]) == 4:
|
||||
print (color.BOLD+"\n\nRenaming "+color.ORANGE+args.input_file.split('/')[-1]\
|
||||
.split('_Filtered')[0]+color.END+color.BOLD+"'s files with the following 10-character\n"\
|
||||
"code: "+color.CYAN+args.name+color.END+'\n')
|
||||
else:
|
||||
print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n')
|
||||
print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\
|
||||
'Am_ar_Ehis\n\n'+color.END)
|
||||
sys.exit()
|
||||
|
||||
|
||||
##########################################################################################
|
||||
###------------------------- Creates Folders For Storing Data -------------------------###
|
||||
##########################################################################################
|
||||
|
||||
def prep_folders(args):
|
||||
|
||||
|
||||
if os.path.isdir(args.all_output_folder + '/ReadyToGo/') != True:
|
||||
os.system('mkdir ' + args.all_output_folder + '/ReadyToGo')
|
||||
|
||||
|
||||
if os.path.isdir(args.r2g_ntd) != True:
|
||||
os.system('mkdir ' + args.r2g_ntd)
|
||||
if os.path.isdir(args.r2g_aa) != True:
|
||||
os.system('mkdir ' + args.r2g_aa)
|
||||
if os.path.isdir(args.r2g_tsv) != True:
|
||||
os.system('mkdir ' + args.r2g_tsv)
|
||||
if os.path.isdir(args.r2g_xml) != True:
|
||||
os.system('mkdir ' + args.r2g_xml)
|
||||
|
||||
if os.path.isdir(args.all_output_folder + '/' + args.file_prefix + '/Renamed') != True:
|
||||
os.system('mkdir ' + args.all_output_folder + '/' + args.file_prefix + '/Renamed')
|
||||
|
||||
###########################################################################################
|
||||
###----------- Renames the NTD and AA CDSs with the Given 10-Character Code ------------###
|
||||
###########################################################################################
|
||||
|
||||
def rename_paralogs(args):
|
||||
|
||||
home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/'
|
||||
|
||||
print (color.BOLD+'\nRenaming Translated (Protein) '+color.PURPLE+'ORFs\n'+color.END)
|
||||
renamed_Final_Prots = open(args.input_AA).read().replace('>','>'+args.name+'_XX_')
|
||||
|
||||
print (color.BOLD+'\nRenaming Nucleotide '+color.PURPLE+'ORFs\n'+color.END)
|
||||
renamed_Final_Nucs = open(args.input_NTD).read().replace('>','>'+args.name+'_XX_')
|
||||
|
||||
|
||||
print (color.BOLD+'\nUpdating CDS Names in the Spreadsheet'+color.END)
|
||||
if '\n\n' in open(args.input_TSV).read():
|
||||
renamed_Final_tsv = args.name+'_XX_'+open(args.input_TSV).read().rstrip('\n')\
|
||||
.replace('\n\n','\n'+args.name+'_XX_')
|
||||
else:
|
||||
renamed_Final_tsv = args.name+'_XX_'+open(args.input_TSV).read().rstrip('\n')\
|
||||
.replace('\n','\n'+args.name+'_XX_')
|
||||
|
||||
with open(home_folder+args.name+'_XX_'+args.input_AA.split('/')[-1],'w+') as w:
|
||||
w.write(renamed_Final_Prots)
|
||||
|
||||
with open(home_folder+args.name+'_XX_'+args.input_NTD.split('/')[-1],'w+') as x:
|
||||
x.write(renamed_Final_Nucs)
|
||||
|
||||
|
||||
with open(home_folder+args.name+'_XX_'+args.input_TSV.split('/')[-1],'w+') as y:
|
||||
y.write(renamed_Final_tsv)
|
||||
|
||||
|
||||
###########################################################################################
|
||||
###--------------------------------- Header/Tail Lines ---------------------------------###
|
||||
###########################################################################################
|
||||
|
||||
def header_tail():
|
||||
header = '<?xml version="1.0"?>\n<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n'\
|
||||
'<BlastOutput>\n <BlastOutput_program>blastp</BlastOutput_program>\n <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>\n'\
|
||||
' <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n'\
|
||||
' <BlastOutput_db>../OGBlastDB/renamed_aa_seqs_OrthoMCL-5_12653.fasta</BlastOutput_db>\n <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n'
|
||||
|
||||
tail = '</BlastOutput_iterations>\n</BlastOutput>'
|
||||
return header, tail
|
||||
|
||||
|
||||
###########################################################################################
|
||||
###------------------------------- TSV to XML Conversion -------------------------------###
|
||||
###########################################################################################
|
||||
|
||||
def convert_TSV_data(args):
|
||||
|
||||
home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/'
|
||||
|
||||
TSVforConvert = home_folder+args.name+'_XX_'+args.input_TSV.split('/')[-1]
|
||||
|
||||
inTSV = [line.rstrip('\n') for line in open(TSVforConvert).readlines() if line != '\n']
|
||||
|
||||
iterations = []
|
||||
|
||||
for n in range(len(inTSV)):
|
||||
if n == 0:
|
||||
iterations.append(' <BlastOutput_query-def>'+inTSV[n].split('\t')[0]+'</BlastOutput_query-def>\n <BlastOutput_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</BlastOutput_query-len>\n'\
|
||||
' <BlastOutput_param>\n <Parameters>\n <Parameters_matrix>BLOSUM62</Parameters_matrix>\n <Parameters_expect>1e-10</Parameters_expect>\n'\
|
||||
' <Parameters_gap-open>11</Parameters_gap-open>\n <Parameters_gap-extend>1</Parameters_gap-extend>\n <Parameters_filter>F</Parameters_filter>\n'\
|
||||
' </Parameters>\n </BlastOutput_param>\n<BlastOutput_iterations>\n<Iteration>\n <Iteration_iter-num>1</Iteration_iter-num>\n <Iteration_query-ID>Query_1</Iteration_query-ID>\n'\
|
||||
' <Iteration_query-def>'+inTSV[n].split('\t')[0]+'</Iteration_query-def>\n <Iteration_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Iteration_query-len>\n'\
|
||||
'<Iteration_hits>\n<Hit>\n <Hit_num>1</Hit_num>\n <Hit_id>Fake_Entry</Hit_id>\n <Hit_def>'+inTSV[n].split('\t')[1]+'</Hit_def>\n <Hit_accession>Fake_Accession</Hit_accession>\n'\
|
||||
' <Hit_len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Hit_len>\n <Hit_hsps>\n <Hsp>\n <Hsp_num>1</Hsp_num>\n <Hsp_bit-score>1234</Hsp_bit-score>\n'\
|
||||
' <Hsp_score>'+inTSV[n].split('\t')[-1]+'</Hsp_score>\n <Hsp_evalue>'+inTSV[n].split('\t')[-2]+'</Hsp_evalue>\n <Hsp_query-from>'+inTSV[n].split('\t')[-4]+'</Hsp_query-from>\n'\
|
||||
' <Hsp_query-to>'+inTSV[n].split('\t')[-3]+'</Hsp_query-to>\n <Hsp_hit-from>'+inTSV[n].split('\t')[-4]+'</Hsp_hit-from>\n <Hsp_hit-to>'+inTSV[n].split('\t')[-3]+'</Hsp_hit-to>\n'\
|
||||
' <Hsp_query-frame>0</Hsp_query-frame>\n <Hsp_hit-frame>0</Hsp_hit-frame>\n <Hsp_identity>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_identity>\n'\
|
||||
' <Hsp_positive>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_positive>\n <Hsp_gaps>0</Hsp_gaps>\n <Hsp_align-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_align-len>\n'\
|
||||
' <Hsp_qseq></Hsp_qseq>\n <Hsp_hseq></Hsp_hseq>\n <Hsp_midline></Hsp_midline>\n </Hsp>\n </Hit_hsps>\n</Hit>\n'\
|
||||
'\n</Iteration_hits>\n <Iteration_stat>\n <Statistics>\n <Statistics_db-num>379660</Statistics_db-num>\n <Statistics_db-len>197499634</Statistics_db-len>\n'\
|
||||
' <Statistics_hsp-len>123</Statistics_hsp-len>\n <Statistics_eff-space>184705217500</Statistics_eff-space>\n <Statistics_kappa>0.041</Statistics_kappa>\n'\
|
||||
' <Statistics_lambda>0.267</Statistics_lambda>\n <Statistics_entropy>0.14</Statistics_entropy>\n </Statistics>\n </Iteration_stat>\n</Iteration>\n')
|
||||
else:
|
||||
iterations.append('<Iteration>\n <Iteration_iter-num>'+str(n+1)+'</Iteration_iter-num>\n <Iteration_query-ID>Query_'+str(n+1)+'</Iteration_query-ID>\n'\
|
||||
' <Iteration_query-def>'+inTSV[n].split('\t')[0]+'</Iteration_query-def>\n <Iteration_query-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Iteration_query-len>\n'\
|
||||
'<Iteration_hits>\n<Hit>\n <Hit_num>1</Hit_num>\n <Hit_id>Fake_Entry</Hit_id>\n <Hit_def>'+inTSV[n].split('\t')[1]+'</Hit_def>\n <Hit_accession>Fake_Accession</Hit_accession>\n'\
|
||||
' <Hit_len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'</Hit_len>\n <Hit_hsps>\n <Hsp>\n <Hsp_num>1</Hsp_num>\n <Hsp_bit-score>1234</Hsp_bit-score>\n'\
|
||||
' <Hsp_score>'+inTSV[n].split('\t')[-1]+'</Hsp_score>\n <Hsp_evalue>'+inTSV[n].split('\t')[-2]+'</Hsp_evalue>\n <Hsp_query-from>'+inTSV[n].split('\t')[-4]+'</Hsp_query-from>\n'\
|
||||
' <Hsp_query-to>'+inTSV[n].split('\t')[-3]+'</Hsp_query-to>\n <Hsp_hit-from>'+inTSV[n].split('\t')[-4]+'</Hsp_hit-from>\n <Hsp_hit-to>'+inTSV[n].split('\t')[-3]+'</Hsp_hit-to>\n'\
|
||||
' <Hsp_query-frame>0</Hsp_query-frame>\n <Hsp_hit-frame>0</Hsp_hit-frame>\n <Hsp_identity>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_identity>\n'\
|
||||
' <Hsp_positive>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_positive>\n <Hsp_gaps>0</Hsp_gaps>\n <Hsp_align-len>'+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'</Hsp_align-len>\n'\
|
||||
' <Hsp_qseq></Hsp_qseq>\n <Hsp_hseq></Hsp_hseq>\n <Hsp_midline></Hsp_midline>\n </Hsp>\n </Hit_hsps>\n</Hit>\n'\
|
||||
'\n</Iteration_hits>\n <Iteration_stat>\n <Statistics>\n <Statistics_db-num>379660</Statistics_db-num>\n <Statistics_db-len>197499634</Statistics_db-len>\n'\
|
||||
' <Statistics_hsp-len>123</Statistics_hsp-len>\n <Statistics_eff-space>184705217500</Statistics_eff-space>\n <Statistics_kappa>0.041</Statistics_kappa>\n'\
|
||||
' <Statistics_lambda>0.267</Statistics_lambda>\n <Statistics_entropy>0.14</Statistics_entropy>\n </Statistics>\n </Iteration_stat>\n</Iteration>\n')
|
||||
|
||||
return iterations
|
||||
|
||||
|
||||
###########################################################################################
|
||||
###--------------------------- Writes Out the Fake XML File ----------------------------###
|
||||
###########################################################################################
|
||||
|
||||
def write_Fake_XML(args):
|
||||
|
||||
home_folder = args.all_output_folder + '/' + args.file_prefix + '/'
|
||||
|
||||
print (color.BOLD+'\n\nConverting '+color.ORANGE+args.input_file.split('/')[-1]+color.END\
|
||||
+color.BOLD+' to XML format\n'+color.END)
|
||||
|
||||
header, tail = header_tail()
|
||||
|
||||
iterations = convert_TSV_data(args)
|
||||
|
||||
with open(home_folder+args.out_XML,'w+') as w:
|
||||
w.write(header)
|
||||
w.write(''.join(iterations))
|
||||
w.write(tail)
|
||||
|
||||
##########################################################################################
|
||||
###-------------------- Cleans up the Folder and Moves Final Files --------------------###
|
||||
##########################################################################################
|
||||
def clean_up(args):
|
||||
|
||||
home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/'
|
||||
|
||||
os.system('cp ' + args.all_output_folder + '/' + args.file_prefix+'/'+args.out_XML+' '+args.r2g_xml)
|
||||
|
||||
os.system('cp '+home_folder+'*tsv '+args.r2g_tsv)
|
||||
|
||||
os.system('cp '+home_folder+'*_XX_*AA.ORF.fasta '+args.r2g_aa)
|
||||
os.system('cp '+home_folder+'*_XX_*NTD.ORF.fasta '+args.r2g_ntd)
|
||||
|
||||
os.system('cp '+home_folder+'*_XX_*tsv ' + args.all_output_folder + '/' + args.file_prefix)
|
||||
os.system('cp '+home_folder+'*_XX_*AA.ORF.fasta ' + args.all_output_folder + '/' + args.file_prefix)
|
||||
os.system('cp '+home_folder+'*_XX_*NTD.ORF.fasta ' + args.all_output_folder + '/' + args.file_prefix)
|
||||
|
||||
os.system('rm ' + args.all_output_folder + '/ToRename/*'+args.file_prefix+'*')
|
||||
|
||||
if os.path.isdir(args.all_output_folder + '/Finished/') != True:
|
||||
os.system('mkdir ' + args.all_output_folder + '/Finished')
|
||||
|
||||
os.system('mv ' + args.all_output_folder + '/' + args.file_prefix + ' ' + args.all_output_folder + '/Finished')
|
||||
|
||||
###########################################################################################
|
||||
###-------------------------------- Next Script Message --------------------------------###
|
||||
###########################################################################################
|
||||
|
||||
def next_script(args):
|
||||
|
||||
print (color.BOLD+'\nThere is no next script! The final '+color.ORANGE+args.out_XML\
|
||||
.split('_XX')[0]+color.END+color.BOLD+' files can be\nfound in the '+color.RED+\
|
||||
args.out_XML.split('_XX_')[-1].split('_Filtered')[0]+color.END+color.BOLD+' and '\
|
||||
+color.RED+'ReadyToGo folders'+color.END+color.BOLD+' and are ready\n'\
|
||||
'for the KatzLab Phylogenomic Tree-Building Steps!\n\n'+color.END)
|
||||
|
||||
##########################################################################################
|
||||
###--------------- Checks Command Line Arguments and Calls on Functions ---------------###
|
||||
##########################################################################################
|
||||
|
||||
def main():
|
||||
|
||||
args = check_args()
|
||||
|
||||
check_code(args)
|
||||
|
||||
prep_folders(args)
|
||||
|
||||
rename_paralogs(args)
|
||||
|
||||
write_Fake_XML(args)
|
||||
|
||||
clean_up(args)
|
||||
|
||||
next_script(args)
|
||||
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user