From b466a30a969fe7aeaecc78fb175ac92bbed85de1 Mon Sep 17 00:00:00 2001 From: Godwin Ani Date: Mon, 4 Dec 2023 14:54:57 -0500 Subject: [PATCH] Updated preguidance with no logger dependency --- PTL2/Scripts/preguidance.py | 99 ++++++++++++++----------------------- 1 file changed, 38 insertions(+), 61 deletions(-) diff --git a/PTL2/Scripts/preguidance.py b/PTL2/Scripts/preguidance.py index 95c3045..1ef3136 100644 --- a/PTL2/Scripts/preguidance.py +++ b/PTL2/Scripts/preguidance.py @@ -1,5 +1,4 @@ import os, sys, re -from logger import Logger from Bio import SeqIO @@ -8,53 +7,71 @@ def run(params): try: ogs = list(dict.fromkeys([line.strip() for line in open(params.gf_list)])) except (FileNotFoundError, TypeError) as e: - Logger.Error('Unable to read GF list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e)) + print('\nERROR: Unable to read GF list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e) + '\n') + exit() try: taxa = list(dict.fromkeys([line.strip() for line in open(params.taxon_list)])) except (FileNotFoundError, TypeError) as e: - Logger.Error('Unable to read taxon list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e)) + print('\nERROR: Unable to read taxon list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e) + '\n') + exit() - try: - sim_taxa = list(dict.fromkeys([line.strip() for line in open(params.sim_taxa)])) - except (FileNotFoundError, TypeError) as e: - Logger.Error('Unable to read similarity taxa list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e)) + if params.sim_taxa != None: + try: + sim_taxa = list(dict.fromkeys([line.strip() for line in open(params.sim_taxa)])) + except (FileNotFoundError, TypeError) as e: + print('\nERROR: Unable to read similarity taxa list file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e) + '\n') + exit() + else: + sim_taxa = 'all' + + if params.blacklist != None: + try: + blacklist_seqs = list(dict.fromkeys([line.strip() for line in open(params.blacklist)])) + except (FileNotFoundError, TypeError) as e: + print('\nERROR: Unable to read blacklist file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e) + '\n') + exit() + else: + blacklist_seqs = [] - try: - blacklist_seqs = list(dict.fromkeys([line.strip() for line in open(params.blacklist)])) - except (FileNotFoundError, TypeError) as e: - print('\nUnable to read blacklist file. Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e)) - if not os.path.isdir(params.data): - Logger.Error(Logger.Error('Input amino-acid data files not found. Please make sure that the given path (--data) is correct.')) + print('\nInput amino-acid data files not found. Please make sure that the given path (--data) is correct.\n') aa_files = [f for f in os.listdir(params.data) if f[:10] in taxa if f.endswith('.faa') or f.endswith('.fa') or f.endswith('.fasta')] missing_taxa = [tax for tax in taxa if tax not in [f[:10] for f in aa_files]] if(len(missing_taxa) > 0): - Logger.Warning('The following taxa in the taxon list are missing amino-acid files in ' + params.data + ':\n' + '\n'.join(['\t' + t for t in missing_taxa])) + print('\nWARNING: The following taxa in the taxon list are missing amino-acid files in ' + params.data + ':\n' + '\n'.join(['\t' + t for t in missing_taxa]) + '\n') os.mkdir(params.output + '/Output/Intermediate/SF_Diamond') removed_file = open(params.output + '/Output/Pre-Guidance/SimFilter_removed.txt', 'w') for og in ogs: - Logger.Message('Processing ' + og) - with open(params.output + '/Output/Pre-Guidance/' + og + '_preguidance.faa', 'w') as preguidance_file: + print('\nProcessing ' + og + '\n') + with open(params.output + '/Output/Pre-Guidance/' + og + '_preguidance.fasta', 'w') as preguidance_file: for taxon_file in aa_files: recs = [] for rec in sorted([rec for rec in SeqIO.parse(params.data + '/' + taxon_file, 'fasta') if rec.id[-10:] == og and rec.id not in blacklist_seqs and params.og_identifier in rec.id], key=lambda x: -len(x.seq)): if(rec.id == rec.description): recs.append(rec) else: - Logger.Warning('\tThe sequence ID ' + rec.description + ' is invalid. Please make sure that sequence IDs contain no spaces, tabs, etc. This sequence is being excluded.') + print('\n\tThe sequence ID ' + rec.description + ' is invalid. Please make sure that sequence IDs contain no spaces, tabs, etc. This sequence is being excluded.\n') + + if sim_taxa == 'all': + use_taxon = True + else: + if taxon_file[:10] in sim_taxa: + use_taxon = True + else: + use_taxon = False masters = []; removed = 0; flag = 0; cycle = 0 - if params.similarity_filter and taxon_file[:10] in sim_taxa: + if params.similarity_filter and use_taxon: if len(recs) > 1: while flag == 0: master_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_master_' + str(cycle) - query_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.faa' + query_file_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_queries_' + str(cycle) + '.fasta' diamond_out_name = params.output + '/Output/Intermediate/SF_Diamond/' + og + '_' + taxon_file[:10] + '_diamond_results_' + str(cycle) + '.tsv' open(master_file_name + '.faa', 'w').write('>' + recs[0].id + '\n' + str(recs[0].seq) + '\n\n') @@ -85,8 +102,8 @@ def run(params): for item in recs_to_remove: removed_file.write(f"{item}\n") - Logger.Message('\t' + str(removed) + ' sequence(s) removed by the similarity filter (' + str(cycle + 1) + ' iterations) from ' + taxon_file[:10]) - + print('\n\t' + str(removed) + ' sequence(s) removed by the similarity filter (' + str(cycle + 1) + ' iterations) from ' + taxon_file[:10] + '\n') + for rec in recs + masters: preguidance_file.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n') @@ -95,43 +112,3 @@ def run(params): if(not params.keep_temp): os.system('rm -r ' + params.output + '/Output/Intermediate/SF_Diamond') - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -