diff --git a/Utilities/for_trees/count_tips.py b/Utilities/for_trees/count_tips.py new file mode 100644 index 0000000..16452eb --- /dev/null +++ b/Utilities/for_trees/count_tips.py @@ -0,0 +1,82 @@ +''' +Author: Auden Cote-L'Heureux +Last updated: 06/14/23 by Elinor +Motivation: Count the number of occurences of each taxa in each OG in a post guidance file +Dependencies: Bio python, os, sys +Inputs: Directory of postguidance files +Outputs: CSV file tallying all the counts of taxa in each OG file +Command line: python count_tips.py --input +''' + +import os +import sys +from Bio import SeqIO + + +def get_args(): + + in_dir = '' + + if('--input' in sys.argv or '-i' in sys.argv): + try: + if('--input' in sys.argv): + in_dir = sys.argv[sys.argv.index('--input') + 1] + else: + in_dir = sys.argv[sys.argv.index('-i') + 1] + except IndexError: + print('\nError: Something went wrong went parsing the arguments... maybe you forgot to input an input directory of trees?\n') + print('\nPlease input a folder of .tre files:\n\n\tpython count_tips.py --input \n') + exit() + else: + print('\nPlease input a folder of .tre files:\n\n\tpython count_tips.py --input \n') + exit() + + + if(in_dir.endswith('/')): + in_dir = in_dir[:-1] + + if(not os.path.isdir(in_dir)): + print('\nPlease input a folder of .tre files:\n\n\tpython count_tips.py --input \n') + exit() + + return in_dir + + +def count_tips(in_dir): + + count_data = { } + for file in os.listdir(in_dir): + if(file.endswith('.fas')): + fname = in_dir + '/' + file + + count_data.update({ file : { } }) + tips = [record.id[:10] for record in SeqIO.parse(in_dir+'/'+file, 'fasta')] + + for tip in tips: + tip = tip.strip() + if(tip[:10] not in count_data[file]): + count_data[file].update({ tip[:10] : 0 }) + count_data[file][tip[:10]] += 1 + + taxa = sorted(list(dict.fromkeys([tax for file in count_data for tax in count_data[file]]))) + + with open('tip_count_data.csv', 'w') as o: + o.write(',' + ','.join(taxa) + '\n') + for file in count_data: + o.write(file) + for tax in taxa: + if(tax in count_data[file]): + o.write(',' + str(count_data[file][tax])) + else: + o.write(',0') + o.write('\n') + + +def main(): + + in_dir = get_args() + + count_tips(in_dir) + + +main() \ No newline at end of file