mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 07:10:25 +08:00
Add files via upload
This commit is contained in:
parent
b2079b725d
commit
165a95fa9d
56
Utilities/for_fastas/sharedOGs.py
Normal file
56
Utilities/for_fastas/sharedOGs.py
Normal file
@ -0,0 +1,56 @@
|
||||
#Author, date: ACL June 8 2023
|
||||
#Motivation: Get record of OG presence across taxa from ReadyToGo files
|
||||
#Intent: Create a spreadsheet summarizing OG presence
|
||||
#Inputs: A folder of ReadyToGo files
|
||||
#Outputs: Spreadsheet
|
||||
#Example: Python sharedOGs.py ReadyToGo_AA
|
||||
|
||||
|
||||
import os, sys
|
||||
from Bio import SeqIO
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
input_dir = sys.argv[1]
|
||||
|
||||
print('\nCreating a record of taxa per OG...')
|
||||
|
||||
taxa_by_og = { }
|
||||
for file in tqdm(os.listdir(input_dir)):
|
||||
if file.split('.')[-1] in ('fasta', 'faa', 'fna', 'fa'):
|
||||
tax = file[:10]
|
||||
for rec in SeqIO.parse(input_dir + '/' + file, 'fasta'):
|
||||
if rec.id[-10:] not in taxa_by_og:
|
||||
taxa_by_og.update({ rec.id[-10:] : [] })
|
||||
|
||||
taxa_by_og[rec.id[-10:]].append(tax)
|
||||
|
||||
|
||||
print('\nWriting output file...')
|
||||
|
||||
all_taxa = sorted(list(dict.fromkeys([tax for og in taxa_by_og for tax in taxa_by_og[og]])))
|
||||
all_maj = sorted(list(dict.fromkeys([tax[:2] for og in taxa_by_og for tax in taxa_by_og[og]])))
|
||||
with open('OGSharedness.csv', 'w') as o:
|
||||
o.write('OG,Sequences,Species,Paralogness,MinorClades,MajorClades,' + ','.join(all_maj) + ',' + ','.join(all_taxa) + '\n')
|
||||
for og in tqdm(taxa_by_og):
|
||||
|
||||
og_majs = list(dict.fromkeys([tax[:2] for tax in taxa_by_og[og]]))
|
||||
og_taxa = list(dict.fromkeys(taxa_by_og[og]))
|
||||
|
||||
o.write(og + ',' + str(len(taxa_by_og[og])) + ',' + str(len(list(dict.fromkeys(taxa_by_og[og])))) + ',' + str(len(taxa_by_og[og])/len(list(dict.fromkeys(taxa_by_og[og])))) + ',' + str(len(list(dict.fromkeys([tax[:5] for tax in taxa_by_og[og]])))) + ',' + str(len(list(dict.fromkeys([tax[:2] for tax in taxa_by_og[og]])))))
|
||||
for maj in all_maj:
|
||||
if maj in og_majs:
|
||||
o.write(',' + str(len([tax for tax in og_taxa if tax[:2] == maj])))
|
||||
else:
|
||||
o.write(',0')
|
||||
|
||||
for tax in all_taxa:
|
||||
if tax in taxa_by_og[og]:
|
||||
o.write(',' + str(taxa_by_og[og].count(tax)))
|
||||
else:
|
||||
o.write(',0')
|
||||
o.write('\n')
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user