diff --git a/Utilities/for_trees/CountTaxonOccurence.py b/Utilities/for_trees/CountTaxonOccurence.py index 9022d71..6d169ff 100644 --- a/Utilities/for_trees/CountTaxonOccurence.py +++ b/Utilities/for_trees/CountTaxonOccurence.py @@ -5,6 +5,7 @@ Motivation: Count the number of occurrences of each taxa in each OG in a post gu Dependencies: Bio python, os, sys Inputs: Directory of postguidance files Optional: use the --minor flag and include a file named focal_minors.txt in same folder (do not put file name in command line). This file should be csv of targets (Am_tu, Sr_rh, Sr_ci) +Optional: OR use the --major flag and include a file named focal_majors.txt in same folder (do not put file name in command line). This file should be csv of targets (Am, Sr) Outputs: CSV file tallying all the counts of taxa in each OG file plus minor and major clade tallies Command line: python3 CountTaxonOccurence_faster_minor.py --input --minor ''' @@ -21,8 +22,10 @@ def get_args(): prog = 'Taxon occurrence counting script', description = "Updated June 9, 2024" ) + parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to the folder containing the aligned/unaligned fasta files') parser.add_argument('--minor', action='store_true', help = 'Flag to use focal minor clades from focal_minors.txt') + parser.add_argument('--major', action='store_true', help = 'Flag to use focal major clades from focal_majors.txt') args = parser.parse_args() if(args.input.endswith('/')): @@ -32,10 +35,10 @@ def get_args(): print('\nThe input folder (--input) could not be found. Make sure you have given the correct path.\n') exit() - return args.input, args.minor + return args.input, args.minor, args.major -def count_tips(in_dir, use_focal_minors): +def count_tips(in_dir, use_focal_minors, use_focal_majors): focal_minors = [] if use_focal_minors: @@ -44,7 +47,17 @@ def count_tips(in_dir, use_focal_minors): focal_minors = f.read().strip().split(',') focal_minors = [minor.strip() for minor in focal_minors] except FileNotFoundError: - print('A file called focal_minors.txt must be included in the folder with your script. This file should have a csv of target minor clades such as "Am_tu, Sr_ci, Sr_rh"') + print('A file called focal_minors.txt must be included. This file should have a csv of target minor clades such as "Am_tu, Sr_ci, Sr_rh"') + exit() + + focal_majors = [] + if use_focal_majors: + try: + with open('focal_majors.txt', 'r') as f: + focal_majors = f.read().strip().split(',') + focal_majors = [major.strip() for major in focal_majors] + except FileNotFoundError: + print('A file called focal_majors.txt must be included. This file should have a list of target major clades such as "Am, Sr"') exit() count_data = {} @@ -65,9 +78,11 @@ def count_tips(in_dir, use_focal_minors): major_clade = tip[:2] minor_clade = tip[:5] - if use_focal_minors: - if minor_clade not in focal_minors: - continue + if use_focal_minors and minor_clade not in focal_minors: + continue + + if use_focal_majors and major_clade not in focal_majors: + continue major_clades.add(major_clade) minor_clades.add(minor_clade) @@ -80,6 +95,10 @@ def count_tips(in_dir, use_focal_minors): # Filter major and minor clades based on focal minors major_clades = sorted({minor[:2] for minor in focal_minors}) minor_clades = sorted(focal_minors) + elif use_focal_majors: + # Filter major and minor clades based on focal majors + major_clades = sorted(focal_majors) + minor_clades = sorted({clade for clade in minor_clades if clade[:2] in focal_majors}) else: major_clades = sorted(major_clades) minor_clades = sorted(minor_clades) @@ -117,8 +136,8 @@ def count_tips(in_dir, use_focal_minors): def main(): - in_dir, use_focal_minors = get_args() - count_tips(in_dir, use_focal_minors) + in_dir, use_focal_minors, use_focal_majors = get_args() + count_tips(in_dir, use_focal_minors, use_focal_majors) main()