For adding metadata to tree tip labels

This commit is contained in:
Godwin Ani 2023-11-06 10:15:53 -05:00 committed by GitHub
parent 43a7439a02
commit b8436d03dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -0,0 +1,39 @@
''' Author, Date : Godwin Ani, 15th - September - 2023.
Motivation : To make phylogenetic trees more presentable.
Intent : Rename the tip labels of phylogenetic trees.
Dependencies : Python3, Pandas
Inputs : A folder containing trees and a csv file(with headers).
The first column of the csv is the 10 digit code and other columns are the information to be added to the tip labels.
Outputs : A folder of trees with renamed tips.
python3 rename_tip_labels.py -i input to_folder_of_trees -s to_spreadsheet
'''
import os, re, sys, argparse
import pandas as pd
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input')
parser.add_argument('-s', '--spreadsheet')
args = parser.parse_args()
os.makedirs(args.input + '/renamed', exist_ok = True)
df = pd.read_csv(args.spreadsheet)
df = df.astype(str)
df['Merged'] = df.apply(lambda row: '_'.join(row), axis=1)
for file in os.listdir(args.input):
if file.endswith('.tree') or file.endswith('.tre'):
with open(args.input + '/' + file, 'r') as tree:
tree = tree.read()
tree = tree.replace('Len_', 'L')
tree = tree.replace('Cov_', 'Cv')
tree = tree.replace('Contig_', 'Ct')
tree = tree.replace('Len', 'L')
tree = tree.replace('Cov', 'Cv')
tree = tree.replace('Contig', 'Ct')
search_strings = df.iloc[:, 0].tolist()
replacement_strings = df['Merged'].tolist()
for search, replace in zip(search_strings, replacement_strings):
tree = tree.replace(search,replace)
with open(args.input + '/renamed/' + file, 'w') as o:
o.write(tree)