Shorten tree tip names

This commit is contained in:
Godwin Ani 2025-02-04 18:58:53 -05:00 committed by GitHub
parent 5284a71ce8
commit 7e9b90c79b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,50 @@
''' Author, Date : Godwin Ani, 15th - September - 2023.
Motivation : To make phylogenetic trees more presentable.
Intent : Shorten the tip labels of phylogenetic trees.
Dependencies : Python3, ete3
Inputs : A folder containing trees
Outputs : A folder of trees with shortened tips.
python3 RenameTips_v1.0.py -i input to_folder_of_trees
'''
import os, re, sys, argparse, string
import ete3
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input')
args = parser.parse_args()
os.makedirs(args.input + '/renamed', exist_ok = True)
def get_newick(fname):
newick = ''
for line in open(fname):
line = line.split(' ')[-1]
if(line.startswith('(') or line.startswith('tree1=')):
newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '')
return newick
def tree_formatting_wrapper(file):
newick = get_newick(file)
tree = ete3.Tree(newick)
any_letter = tuple(string.ascii_letters)
for leaf in tree:
if leaf.name.startswith(any_letter):
leaf.name = str(leaf.name).split('_Len')[0]
leaf.name = str(leaf.name).replace('Contig_', 'Ct')
leaf.name = str(leaf.name).replace('_XX_0', '')
tree.write(format=1, outfile=args.input + '/renamed/' +file.split('/')[-1] + '.tree')
for tree in os.listdir(args.input):
if tree.split('.')[-1] in ('tree', 'tre', 'treefile', 'nex'):
tree_formatting_wrapper(args.input + '/' + tree)