mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2026-02-10 16:50:27 +08:00
Version of scripts used to successfully run conserved OGs on 11/13/23
Recent updates: Similarity filter adjusted to have on/off parameter Blacklist implemented & tested Output list of Guidance-removed sequences with seq scores Temp/intermediate file naming issue fixed
This commit is contained in:
parent
842b229833
commit
1b587bf266
@ -16,18 +16,18 @@ def run(params):
|
||||
if len([f for f in os.listdir(preguidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]) == 0:
|
||||
Logger.Error('No pre-Guidance (unaligned) files could be found at the path ' + preguidance_path + '. Make sure that the --start and --data parameters are correct, that the pre-Guidance step ran successfully, and that the unaligned files are formatted correctly (they must have the file extension .faa, .fa, or .fasta).')
|
||||
|
||||
os.mkdir(params.output + '/Output/Temp/Guidance')
|
||||
os.mkdir(params.output + '/Output/Temp/Guidance/Input')
|
||||
os.mkdir(params.output + '/Output/Temp/Guidance/Output')
|
||||
os.mkdir(params.output + '/Output/Intermediate/Guidance')
|
||||
os.mkdir(params.output + '/Output/Intermediate/Guidance/Input')
|
||||
os.mkdir(params.output + '/Output/Intermediate/Guidance/Output')
|
||||
|
||||
guidance_input = params.output + '/Output/Temp/Guidance/Input/'
|
||||
guidance_input = params.output + '/Output/Intermediate/Guidance/Input/'
|
||||
os.system('cp -r ' + preguidance_path + '/* ' + guidance_input)
|
||||
|
||||
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
|
||||
guidance_removed_file.write('Sequence\tScore\n')
|
||||
|
||||
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
||||
tax_guidance_outdir = params.output + '/Output/Temp/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
||||
tax_guidance_outdir = params.output + '/Output/Intermediate/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_guidance_outdir)
|
||||
|
||||
fail = False
|
||||
@ -46,7 +46,7 @@ def run(params):
|
||||
else:
|
||||
mafft_alg = 'auto'
|
||||
|
||||
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Temp/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
os.system('Scripts/guidance.v2.02/www/Guidance/guidance.pl --seqFile ' + guidance_input + '/' + file + ' --msaProgram MAFFT --seqType aa --outDir ' + tax_guidance_outdir + ' --seqCutoff ' + str(params.seq_cutoff) + ' --colCutoff ' + str(params.col_cutoff) + " --outOrder as_input --bootstraps 10 --MSA_Param '\\--" + mafft_alg + " --maxiterate 1000 --thread " + str(params.guidance_threads) + " --bl 62 --anysymbol' > " + params.output + '/Output/Intermediate/Guidance/Output/' + file[:10] + '/log.txt')
|
||||
|
||||
if os.path.isfile(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names'):
|
||||
seqs_below = len([line for line in open(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names').readlines()[1:-1] if float(line.split()[-1]) < params.seq_cutoff])
|
||||
@ -60,7 +60,7 @@ def run(params):
|
||||
Logger.Message('Guidance complete after ' + str(i + 1) + ' iterations for gene family ' + file.split('.')[0].split('_preguidance')[0])
|
||||
break
|
||||
|
||||
for line in seqs_below:
|
||||
for line in [line for line in open(tax_guidance_outdir + '/MSA.MAFFT.Guidance2_res_pair_seq.scr_with_Names').readlines()[1:-1] if float(line.split()[-1]) < params.seq_cutoff]:
|
||||
guidance_removed_file.write(line)
|
||||
|
||||
os.system('cp ' + tax_guidance_outdir + '/Seqs.Orig.fas.FIXED.Without_low_SP_Seq.With_Names ' + guidance_input + '/' + file)
|
||||
|
||||
@ -24,7 +24,7 @@ def run(params):
|
||||
if(len(missing_taxa) > 0):
|
||||
Logger.Warning('The following taxa in the taxon list are missing amino-acid files in ' + params.data + ':\n' + '\n'.join(['\t' + t for t in missing_taxa]))
|
||||
|
||||
os.mkdir(params.output + '/Output/Temp/SF_Diamond')
|
||||
os.mkdir(params.output + '/Output/Intermediate/SF_Diamond')
|
||||
|
||||
for og in ogs:
|
||||
Logger.Message('Processing ' + og)
|
||||
@ -41,9 +41,9 @@ def run(params):
|
||||
if params.similarity_filter:
|
||||
if len(recs) > 1:
|
||||
while flag == 0:
|
||||
master_file_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle)
|
||||
query_file_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.faa'
|
||||
diamond_out_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_diamond_results_' + str(cycle) + '.tsv'
|
||||
master_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle)
|
||||
query_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.faa'
|
||||
diamond_out_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_diamond_results_' + str(cycle) + '.tsv'
|
||||
|
||||
open(master_file_name + '.faa', 'w').write('>' + recs[0].id + '\n' + str(recs[0].seq) + '\n\n')
|
||||
masters.append(recs[0])
|
||||
@ -61,7 +61,7 @@ def run(params):
|
||||
line = line.strip().split('\t')
|
||||
|
||||
if float(line[2])/100 >= params.sim_cutoff:
|
||||
recs_to_remove.append(seq); removed =+ 1
|
||||
recs_to_remove.append(line[0]); removed =+ 1
|
||||
|
||||
if len([rec for rec in recs[1:] if rec.id not in recs_to_remove]) < 2:
|
||||
recs = [rec for rec in recs[1:] if rec.id not in recs_to_remove]
|
||||
@ -76,7 +76,7 @@ def run(params):
|
||||
preguidance_file.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n')
|
||||
|
||||
if(not params.keep_temp):
|
||||
os.system('rm -r ' + params.output + '/Output/Temp/SF_Diamond')
|
||||
os.system('rm -r ' + params.output + '/Output/Intermediate/SF_Diamond')
|
||||
|
||||
|
||||
|
||||
|
||||
@ -19,10 +19,10 @@ def run(params):
|
||||
for file in [f for f in os.listdir(guidance_path) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta') or f.endswith('.fas') or f.endswith('.aln')]:
|
||||
|
||||
if params.tree_method == 'iqtree':
|
||||
if not os.path.isdir(params.output + '/Output/Temp/IQTree'):
|
||||
os.mkdir(params.output + '/Output/Temp/IQTree')
|
||||
if not os.path.isdir(params.output + '/Output/Intermediate/IQTree'):
|
||||
os.mkdir(params.output + '/Output/Intermediate/IQTree')
|
||||
|
||||
tax_iqtree_outdir = params.output + '/Output/Temp/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||
tax_iqtree_outdir = params.output + '/Output/Intermediate/IQTree/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_iqtree_outdir)
|
||||
|
||||
os.system('iqtree2 -s ' + guidance_path + '/' + file + ' -m LG+G --prefix ' + tax_iqtree_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_IQTree')
|
||||
@ -34,10 +34,10 @@ def run(params):
|
||||
Logger.Warning('No tree file created by IQ-Tree for OG ' + file[:10])
|
||||
|
||||
elif params.tree_method == 'raxml':
|
||||
if not os.path.isdir(params.output + '/Output/Temp/RAxML'):
|
||||
os.mkdir(params.output + '/Output/Temp/RAxML')
|
||||
if not os.path.isdir(params.output + '/Output/Intermediate/RAxML'):
|
||||
os.mkdir(params.output + '/Output/Intermediate/RAxML')
|
||||
|
||||
tax_raxml_outdir = params.output + '/Output/Temp/RAxML/' + file.split('.')[0].split('_preguidance')[0]
|
||||
tax_raxml_outdir = params.output + '/Output/Intermediate/RAxML/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_raxml_outdir)
|
||||
|
||||
os.system('./Scripts/trimal-trimAl/source/trimal -in ' + guidance_path + '/' + file + ' -phylip -out ' + tax_raxml_outdir + '/aligned.phy')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user