mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 03:30:25 +08:00
Update CountTaxonOccurence.py
adding option to look at only focal major clades
This commit is contained in:
parent
552e958952
commit
7aac410d85
@ -5,6 +5,7 @@ Motivation: Count the number of occurrences of each taxa in each OG in a post gu
|
||||
Dependencies: Bio python, os, sys
|
||||
Inputs: Directory of postguidance files
|
||||
Optional: use the --minor flag and include a file named focal_minors.txt in same folder (do not put file name in command line). This file should be csv of targets (Am_tu, Sr_rh, Sr_ci)
|
||||
Optional: OR use the --major flag and include a file named focal_majors.txt in same folder (do not put file name in command line). This file should be csv of targets (Am, Sr)
|
||||
Outputs: CSV file tallying all the counts of taxa in each OG file plus minor and major clade tallies
|
||||
Command line: python3 CountTaxonOccurence_faster_minor.py --input <dir of postguidance files> --minor
|
||||
'''
|
||||
@ -21,8 +22,10 @@ def get_args():
|
||||
prog = 'Taxon occurrence counting script',
|
||||
description = "Updated June 9, 2024"
|
||||
)
|
||||
|
||||
parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to the folder containing the aligned/unaligned fasta files')
|
||||
parser.add_argument('--minor', action='store_true', help = 'Flag to use focal minor clades from focal_minors.txt')
|
||||
parser.add_argument('--major', action='store_true', help = 'Flag to use focal major clades from focal_majors.txt')
|
||||
args = parser.parse_args()
|
||||
|
||||
if(args.input.endswith('/')):
|
||||
@ -32,10 +35,10 @@ def get_args():
|
||||
print('\nThe input folder (--input) could not be found. Make sure you have given the correct path.\n')
|
||||
exit()
|
||||
|
||||
return args.input, args.minor
|
||||
return args.input, args.minor, args.major
|
||||
|
||||
|
||||
def count_tips(in_dir, use_focal_minors):
|
||||
def count_tips(in_dir, use_focal_minors, use_focal_majors):
|
||||
|
||||
focal_minors = []
|
||||
if use_focal_minors:
|
||||
@ -44,7 +47,17 @@ def count_tips(in_dir, use_focal_minors):
|
||||
focal_minors = f.read().strip().split(',')
|
||||
focal_minors = [minor.strip() for minor in focal_minors]
|
||||
except FileNotFoundError:
|
||||
print('A file called focal_minors.txt must be included in the folder with your script. This file should have a csv of target minor clades such as "Am_tu, Sr_ci, Sr_rh"')
|
||||
print('A file called focal_minors.txt must be included. This file should have a csv of target minor clades such as "Am_tu, Sr_ci, Sr_rh"')
|
||||
exit()
|
||||
|
||||
focal_majors = []
|
||||
if use_focal_majors:
|
||||
try:
|
||||
with open('focal_majors.txt', 'r') as f:
|
||||
focal_majors = f.read().strip().split(',')
|
||||
focal_majors = [major.strip() for major in focal_majors]
|
||||
except FileNotFoundError:
|
||||
print('A file called focal_majors.txt must be included. This file should have a list of target major clades such as "Am, Sr"')
|
||||
exit()
|
||||
|
||||
count_data = {}
|
||||
@ -65,9 +78,11 @@ def count_tips(in_dir, use_focal_minors):
|
||||
major_clade = tip[:2]
|
||||
minor_clade = tip[:5]
|
||||
|
||||
if use_focal_minors:
|
||||
if minor_clade not in focal_minors:
|
||||
continue
|
||||
if use_focal_minors and minor_clade not in focal_minors:
|
||||
continue
|
||||
|
||||
if use_focal_majors and major_clade not in focal_majors:
|
||||
continue
|
||||
|
||||
major_clades.add(major_clade)
|
||||
minor_clades.add(minor_clade)
|
||||
@ -80,6 +95,10 @@ def count_tips(in_dir, use_focal_minors):
|
||||
# Filter major and minor clades based on focal minors
|
||||
major_clades = sorted({minor[:2] for minor in focal_minors})
|
||||
minor_clades = sorted(focal_minors)
|
||||
elif use_focal_majors:
|
||||
# Filter major and minor clades based on focal majors
|
||||
major_clades = sorted(focal_majors)
|
||||
minor_clades = sorted({clade for clade in minor_clades if clade[:2] in focal_majors})
|
||||
else:
|
||||
major_clades = sorted(major_clades)
|
||||
minor_clades = sorted(minor_clades)
|
||||
@ -117,8 +136,8 @@ def count_tips(in_dir, use_focal_minors):
|
||||
|
||||
|
||||
def main():
|
||||
in_dir, use_focal_minors = get_args()
|
||||
count_tips(in_dir, use_focal_minors)
|
||||
in_dir, use_focal_minors, use_focal_majors = get_args()
|
||||
count_tips(in_dir, use_focal_minors, use_focal_majors)
|
||||
|
||||
|
||||
main()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user