mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2026-02-10 21:10:25 +08:00
Adjusting sim filter, adding score to guidance removed seqs output
This commit is contained in:
parent
72660b742b
commit
43a7439a02
@ -23,6 +23,9 @@ def run(params):
|
||||
guidance_input = params.output + '/Output/Temp/Guidance/Input/'
|
||||
os.system('cp -r ' + preguidance_path + '/* ' + guidance_input)
|
||||
|
||||
guidance_removed_file = open(params.output + '/Output/GuidanceRemovedSeqs.txt', 'w')
|
||||
guidance_removed_file.write('Sequence\tScore\n')
|
||||
|
||||
for file in [f for f in os.listdir(guidance_input) if f.endswith('.fa') or f.endswith('.faa') or f.endswith('.fasta')]:
|
||||
tax_guidance_outdir = params.output + '/Output/Temp/Guidance/Output/' + file.split('.')[0].split('_preguidance')[0]
|
||||
os.mkdir(tax_guidance_outdir)
|
||||
@ -57,6 +60,9 @@ def run(params):
|
||||
Logger.Message('Guidance complete after ' + str(i + 1) + ' iterations for gene family ' + file.split('.')[0].split('_preguidance')[0])
|
||||
break
|
||||
|
||||
for line in seqs_below:
|
||||
guidance_removed_file.write(line)
|
||||
|
||||
os.system('cp ' + tax_guidance_outdir + '/Seqs.Orig.fas.FIXED.Without_low_SP_Seq.With_Names ' + guidance_input + '/' + file)
|
||||
|
||||
os.system('rm -r ' + tax_guidance_outdir + '/*')
|
||||
@ -100,9 +106,7 @@ def run(params):
|
||||
else:
|
||||
os.system('mv ' + tax_guidance_outdir + '/' + gdir_file + ' ' + tax_guidance_outdir + '/' + file.split('.')[0].split('_preguidance')[0] + '_' + gdir_file)
|
||||
|
||||
|
||||
|
||||
|
||||
guidance_removed_file.close()
|
||||
|
||||
|
||||
|
||||
|
||||
@ -38,7 +38,7 @@ def run(params):
|
||||
Logger.Warning('\tThe sequence ID ' + rec.description + ' is invalid. Please make sure that sequence IDs contain no spaces, tabs, etc. This sequence is being excluded.')
|
||||
|
||||
masters = []; removed = 0; flag = 0; cycle = 0
|
||||
if params.sim_cutoff < 1:
|
||||
if params.similarity_filter:
|
||||
if len(recs) > 1:
|
||||
while flag == 0:
|
||||
master_file_name = params.output + '/Output/Temp/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle)
|
||||
@ -60,7 +60,7 @@ def run(params):
|
||||
for line in diamond_out:
|
||||
line = line.strip().split('\t')
|
||||
|
||||
if float(line[2])/100 > params.sim_cutoff:
|
||||
if float(line[2])/100 >= params.sim_cutoff:
|
||||
recs_to_remove.append(seq); removed =+ 1
|
||||
|
||||
if len([rec for rec in recs[1:] if rec.id not in recs_to_remove]) < 2:
|
||||
|
||||
@ -24,8 +24,8 @@ def get_params():
|
||||
core = parser.add_argument_group('Core parameters (rarely altered from the defaults)')
|
||||
core.add_argument('--blast_cutoff', default = 1e-20, type = float, help = 'Blast e-value cutoff')
|
||||
core.add_argument('--len_cutoff', default = 10, type = int, help = 'Amino acid length cutoff for removal of very short sequences after column removal in Guidance.')
|
||||
core.add_argument('--similarity_filter', action = 'store_true', help = 'Run the similarity filter in pre-Guidance')
|
||||
core.add_argument('--sim_cutoff', default = 1, type = float, help = 'Sequences from the same taxa that are assigned to the same OG are removed if they are more similar than this cutoff')
|
||||
core.add_argument('--overlap_cutoff', default = 0.35, type = float, help = 'A sequence is removed if its alignment length to the longest sequence in its OG & taxon is greater than this proportion of the length of the longest sequence')
|
||||
core.add_argument('--guidance_iters', default = 5, type = int, help = 'Number of Guidance iterations for sequence removal')
|
||||
core.add_argument('--seq_cutoff', default = 0.3, type = float, help = 'During guidance, taxa are removed if their score is below this cutoff')
|
||||
core.add_argument('--col_cutoff', default = 0.0, type = float, help = 'During guidance, columns are removed if their score is below this cutoff')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user