From 4ae13828153d9eafb21a2d4fd122d3fc52f79b4f Mon Sep 17 00:00:00 2001 From: Godwin Ani Date: Mon, 5 Feb 2024 13:25:11 -0500 Subject: [PATCH] Update Cluster_v2.0.py --- Utilities/for_fastas/Cluster_v2.0.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Utilities/for_fastas/Cluster_v2.0.py b/Utilities/for_fastas/Cluster_v2.0.py index c664922..84915dc 100644 --- a/Utilities/for_fastas/Cluster_v2.0.py +++ b/Utilities/for_fastas/Cluster_v2.0.py @@ -25,10 +25,10 @@ def input_validation(value, error_message): print(error_message) exit(1) -def cluster_sequences(program, threshold, overlap, input_folder, output_folder): +def cluster_sequences(program, identity, overlap, input_folder, output_folder): for file in tqdm(os.listdir(input_folder)): if file.endswith('.fasta'): - subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{threshold}', '-d', '0', '-aS', f'{overlap}']) + subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}']) for file in os.listdir(output_folder): if file.endswith('.clstr'): @@ -52,13 +52,13 @@ def main(): os.mkdir(args.output) if args.type == 'aa': - threshold = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.') + identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.') overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.') - cluster_sequences('cd-hit', threshold, overlap, args.input, args.output) + cluster_sequences('cd-hit', identity, overlap, args.input, args.output) elif args.type == 'dna': - threshold = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.') + identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.') overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.') - cluster_sequences('cd-hit-est', threshold, overlap, args.input, args.output) + cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output) else: print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.') exit(1)