Update Cluster_v2.0.py

This commit is contained in:
Godwin Ani 2024-02-05 13:25:11 -05:00 committed by GitHub
parent f6c34517ae
commit 4ae1382815
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -25,10 +25,10 @@ def input_validation(value, error_message):
print(error_message)
exit(1)
def cluster_sequences(program, threshold, overlap, input_folder, output_folder):
def cluster_sequences(program, identity, overlap, input_folder, output_folder):
for file in tqdm(os.listdir(input_folder)):
if file.endswith('.fasta'):
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{threshold}', '-d', '0', '-aS', f'{overlap}'])
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
for file in os.listdir(output_folder):
if file.endswith('.clstr'):
@ -52,13 +52,13 @@ def main():
os.mkdir(args.output)
if args.type == 'aa':
threshold = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
cluster_sequences('cd-hit', threshold, overlap, args.input, args.output)
cluster_sequences('cd-hit', identity, overlap, args.input, args.output)
elif args.type == 'dna':
threshold = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
cluster_sequences('cd-hit-est', threshold, overlap, args.input, args.output)
cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output)
else:
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
exit(1)