mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 07:50:25 +08:00
Update Cluster_v2.0.py
This commit is contained in:
parent
f6c34517ae
commit
4ae1382815
@ -25,10 +25,10 @@ def input_validation(value, error_message):
|
|||||||
print(error_message)
|
print(error_message)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
def cluster_sequences(program, threshold, overlap, input_folder, output_folder):
|
def cluster_sequences(program, identity, overlap, input_folder, output_folder):
|
||||||
for file in tqdm(os.listdir(input_folder)):
|
for file in tqdm(os.listdir(input_folder)):
|
||||||
if file.endswith('.fasta'):
|
if file.endswith('.fasta'):
|
||||||
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{threshold}', '-d', '0', '-aS', f'{overlap}'])
|
subprocess.run([f'{program}', '-i', f'{input_folder}/{file}', '-o', f'{output_folder}/{file}', '-c', f'{identity}', '-d', '0', '-aS', f'{overlap}'])
|
||||||
|
|
||||||
for file in os.listdir(output_folder):
|
for file in os.listdir(output_folder):
|
||||||
if file.endswith('.clstr'):
|
if file.endswith('.clstr'):
|
||||||
@ -52,13 +52,13 @@ def main():
|
|||||||
os.mkdir(args.output)
|
os.mkdir(args.output)
|
||||||
|
|
||||||
if args.type == 'aa':
|
if args.type == 'aa':
|
||||||
threshold = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
|
identity = input_validation(args.identity, 'ERROR! Use format 0.## for Amino acids sequence identity threshold.')
|
||||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
|
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for Amino acids sequence alignment overlap value.')
|
||||||
cluster_sequences('cd-hit', threshold, overlap, args.input, args.output)
|
cluster_sequences('cd-hit', identity, overlap, args.input, args.output)
|
||||||
elif args.type == 'dna':
|
elif args.type == 'dna':
|
||||||
threshold = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
|
identity = input_validation(args.identity, 'ERROR! Use format 0.## for DNA sequence identity threshold.')
|
||||||
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
|
overlap = input_validation(args.overlap, 'ERROR! Use format 0.## for DNA sequence alignment overlap value.')
|
||||||
cluster_sequences('cd-hit-est', threshold, overlap, args.input, args.output)
|
cluster_sequences('cd-hit-est', identity, overlap, args.input, args.output)
|
||||||
else:
|
else:
|
||||||
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
|
print('Invalid sequence type. Choose "aa" for Amino Acids or "dna" for DNA.')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user