mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2026-02-11 05:00:25 +08:00
Version of scripts used to successfully run conserved OGs on 11/13/23
Recent updates: Similarity filter adjusted to have on/off parameter Blacklist implemented & tested Output list of Guidance-removed sequences with seq scores Temp/intermediate file naming issue fixed
This commit is contained in:
parent
842b229833
commit
1b587bf266
@ -16,18 +16,18 @@ def run(params):
|
|||||||
if len([f for f in os.listdir(preguidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]) == 0:
|
if len([f for f in os.listdir(preguidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]) == 0:
|
||||||
Logger.Error('No pre-Guidance (unaligned) files could be found at the path ' + preguidance_path + '. Make sure that the --start and --data parameters are correct, that the pre-Guidance step ran successfully, and that the unaligned files are formatted correctly (they must have the file extension .faa, .fa, or .fasta).')
|
Logger.Error('No pre-Guidance (unaligned) files could be found at the path ' + preguidance_path + '. Make sure that the --start and --data parameters are correct, that the pre-Guidance step ran successfully, and that the unaligned files are formatted correctly (they must have the file extension .faa, .fa, or .fasta).')
|
||||||
|
|
||||||
os.mkdir(params.output + '/Output/Temp/Guidance')
|
os.mkdir(params.output + '/Output/Intermediate/Guidance')
|
||||||
os.mkdir(params.output + '/Output/Temp/Guidance/Input')
|
os.mkdir(params.output + '/Output/Intermediate/Guidance/Input')
|
||||||
os.mkdir(params.output + '/Output/Temp/Guidance/Output')
|
os.mkdir(params.output + '/Output/Intermediate/Guidance/Output')
|
||||||
|
|
||||||
guidance_input = params.output + '/Output/Temp/Guidance/Input/'
|
guidance_input = params.output + '/Output/Intermediate/Guidance/Input/'
|
||||||
os.system('cp -r ' + preguidance_path + '/* ' + guidance_input)
|
os.system('cp -r ' + preguidance_path + '/* ' + guidance_input)
|
||||||
|
|
||||||
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
|
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
|
||||||
guidance_removed_file.write('Sequence\tScore\n')
|
guidance_removed_file.write('Sequence\tScore\n')
|
||||||
|
|
||||||
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
||||||
tax_guidance_outdir = params.output + '/Output/Temp/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
tax_guidance_outdir = params.output + '/Output/Intermediate/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
||||||
os.mkdir(tax_guidance_outdir)
|
os.mkdir(tax_guidance_outdir)
|
||||||
|
|
||||||
fail = False
|
fail = False
|
||||||
@ -46,7 +46,7 @@ def run(params):
|
|||||||
else:
|
else:
|
||||||
mafft_alg = 'auto'
|
mafft_alg = 'auto'
|
||||||
|
|
||||||
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Temp/Guidance/Output/' + file[:10] + '/log.txt')
|
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||||
|
|
||||||
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
|
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
|
||||||
seqs_below = len([line for line in open(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names').readlines()[1:-1] if float(line.split()[-1]) < params.seq_cutoff])
|
seqs_below = len([line for line in open(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names').readlines()[1:-1] if float(line.split()[-1]) < params.seq_cutoff])
|
||||||
@ -60,7 +60,7 @@ def run(params):
|
|||||||
Logger.Message('Guidance complete after ' + str(i + 1) + ' iterations for gene family ' + file.split('.')[0].split('_preguidance')[0])
|
Logger.Message('Guidance complete after ' + str(i + 1) + ' iterations for gene family ' + file.split('.')[0].split('_preguidance')[0])
|
||||||
break
|
break
|
||||||
|
|
||||||
for line in seqs_below:
|
for line in [line for line in open(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names').readlines()[1:-1] if float(line.split()[-1]) < params.seq_cutoff]:
|
||||||
guidance_removed_file.write(line)
|
guidance_removed_file.write(line)
|
||||||
|
|
||||||
os.system('cp ' + tax_guidance_outdir + '/Seqs.Orig.fas.FIXED.Without_low_SP_Seq.With_Names ' + guidance_input + '/' + file)
|
os.system('cp ' + tax_guidance_outdir + '/Seqs.Orig.fas.FIXED.Without_low_SP_Seq.With_Names ' + guidance_input + '/' + file)
|
||||||
|
|||||||
@ -24,7 +24,7 @@ def run(params):
|
|||||||
if(len(missing_taxa) > 0):
|
if(len(missing_taxa) > 0):
|
||||||
Logger.Warning('The following taxa in the taxon list are missing amino-acid files in ' + params.data + ':\n' + '\n'.join(['\t' + t for t in missing_taxa]))
|
Logger.Warning('The following taxa in the taxon list are missing amino-acid files in ' + params.data + ':\n' + '\n'.join(['\t' + t for t in missing_taxa]))
|
||||||
|
|
||||||
os.mkdir(params.output + '/Output/Temp/SF_Diamond')
|
os.mkdir(params.output + '/Output/Intermediate/SF_Diamond')
|
||||||
|
|
||||||
for og in ogs:
|
for og in ogs:
|
||||||
Logger.Message('Processing ' + og)
|
Logger.Message('Processing ' + og)
|
||||||
@ -41,9 +41,9 @@ def run(params):
|
|||||||
if params.similarity_filter:
|
if params.similarity_filter:
|
||||||
if len(recs) > 1:
|
if len(recs) > 1:
|
||||||
while flag == 0:
|
while flag == 0:
|
||||||
master_file_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle)
|
master_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle)
|
||||||
query_file_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.faa'
|
query_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.faa'
|
||||||
diamond_out_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_diamond_results_' + str(cycle) + '.tsv'
|
diamond_out_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_diamond_results_' + str(cycle) + '.tsv'
|
||||||
|
|
||||||
open(master_file_name + '.faa', 'w').write('>' + recs[0].id + '\n' + str(recs[0].seq) + '\n\n')
|
open(master_file_name + '.faa', 'w').write('>' + recs[0].id + '\n' + str(recs[0].seq) + '\n\n')
|
||||||
masters.append(recs[0])
|
masters.append(recs[0])
|
||||||
@ -61,7 +61,7 @@ def run(params):
|
|||||||
line = line.strip().split('\t')
|
line = line.strip().split('\t')
|
||||||
|
|
||||||
if float(line[2])/100 >= params.sim_cutoff:
|
if float(line[2])/100 >= params.sim_cutoff:
|
||||||
recs_to_remove.append(seq); removed =+ 1
|
recs_to_remove.append(line[0]); removed =+ 1
|
||||||
|
|
||||||
if len([rec for rec in recs[1:] if rec.id not in recs_to_remove]) < 2:
|
if len([rec for rec in recs[1:] if rec.id not in recs_to_remove]) < 2:
|
||||||
recs = [rec for rec in recs[1:] if rec.id not in recs_to_remove]
|
recs = [rec for rec in recs[1:] if rec.id not in recs_to_remove]
|
||||||
@ -76,7 +76,7 @@ def run(params):
|
|||||||
preguidance_file.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n')
|
preguidance_file.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n')
|
||||||
|
|
||||||
if(not params.keep_temp):
|
if(not params.keep_temp):
|
||||||
os.system('rm -r ' + params.output + '/Output/Temp/SF_Diamond')
|
os.system('rm -r ' + params.output + '/Output/Intermediate/SF_Diamond')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -19,10 +19,10 @@ def run(params):
|
|||||||
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
|
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
|
||||||
|
|
||||||
if params.tree_method == 'iqtree':
|
if params.tree_method == 'iqtree':
|
||||||
if not os.path.isdir(params.output + '/Output/Temp/IQTree'):
|
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
|
||||||
os.mkdir(params.output + '/Output/Temp/IQTree')
|
os.mkdir(params.output + '/Output/Intermediate/IQTree')
|
||||||
|
|
||||||
tax_iqtree_outdir = params.output + '/Output/Temp/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||||
os.mkdir(tax_iqtree_outdir)
|
os.mkdir(tax_iqtree_outdir)
|
||||||
|
|
||||||
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_IQTree')
|
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_IQTree')
|
||||||
@ -34,10 +34,10 @@ def run(params):
|
|||||||
Logger.Warning('No tree file created by IQ-Tree for OG ' + file[:10])
|
Logger.Warning('No tree file created by IQ-Tree for OG ' + file[:10])
|
||||||
|
|
||||||
elif params.tree_method == 'raxml':
|
elif params.tree_method == 'raxml':
|
||||||
if not os.path.isdir(params.output + '/Output/Temp/RAxML'):
|
if not os.path.isdir(params.output + '/Output/Intermediate/RAxML'):
|
||||||
os.mkdir(params.output + '/Output/Temp/RAxML')
|
os.mkdir(params.output + '/Output/Intermediate/RAxML')
|
||||||
|
|
||||||
tax_raxml_outdir = params.output + '/Output/Temp/RAxML/' + file.split('.')[0].split('_preguidance')[0]
|
tax_raxml_outdir = params.output + '/Output/Intermediate/RAxML/' + file.split('.')[0].split('_preguidance')[0]
|
||||||
os.mkdir(tax_raxml_outdir)
|
os.mkdir(tax_raxml_outdir)
|
||||||
|
|
||||||
os.system('./Scripts/trimal-trimAl/source/trimal -in ' + guidance_path + '/' + file + ' -phylip -out ' + tax_raxml_outdir + '/aligned.phy')
|
os.system('./Scripts/trimal-trimAl/source/trimal -in ' + guidance_path + '/' + file + ' -phylip -out ' + tax_raxml_outdir + '/aligned.phy')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user