From 4e1ceae6274eaf94dc2b8b97285165dc0ff1741b Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:54:09 -0500 Subject: [PATCH] Delete Utilities/for_trees/ColorByClade_v2.0.py --- Utilities/for_trees/ColorByClade_v2.0.py | 263 ----------------------- 1 file changed, 263 deletions(-) delete mode 100644 Utilities/for_trees/ColorByClade_v2.0.py diff --git a/Utilities/for_trees/ColorByClade_v2.0.py b/Utilities/for_trees/ColorByClade_v2.0.py deleted file mode 100644 index 54843f4..0000000 --- a/Utilities/for_trees/ColorByClade_v2.0.py +++ /dev/null @@ -1,263 +0,0 @@ -#Author, date: Auden Cote-L'Heureux, last updated Aug 1st 2023 -#Motivation: Visualize placement of taxa by taxonomic group in trees -#Intent: Color tip labels in trees by taxonomic group -#Dependencies: Python3, ete3 -#Inputs: A folder of trees -#Outputs: a folder of colored trees -#Example: python ColorByClade_v2.0.py -i /path/to/trees - - -import os, sys -import ete3 -import argparse - - -#Needed for communicating with Figtree program -figtree_format = '''begin figtree; - set appearance.backgroundColorAttribute="Default"; - set appearance.backgroundColour=#ffffff; - set appearance.branchColorAttribute="User selection"; - set appearance.branchColorGradient=false; - set appearance.branchLineWidth=1.0; - set appearance.branchMinLineWidth=0.0; - set appearance.branchWidthAttribute="Fixed"; - set appearance.foregroundColour=#000000; - set appearance.hilightingGradient=false; - set appearance.selectionColour=#2d3680; - set branchLabels.colorAttribute="User selection"; - set branchLabels.displayAttribute="Branch times"; - set branchLabels.fontName="sansserif"; - set branchLabels.fontSize=8; - set branchLabels.fontStyle=0; - set branchLabels.isShown=false; - set branchLabels.significantDigits=4; - set layout.expansion=0; - set layout.layoutType="RECTILINEAR"; - set layout.zoom=0; - set legend.attribute=null; - set legend.fontSize=10.0; - set legend.isShown=false; - set legend.significantDigits=4; - set nodeBars.barWidth=4.0; - set nodeBars.displayAttribute=null; - set nodeBars.isShown=false; - set nodeLabels.colorAttribute="User selection"; - set nodeLabels.displayAttribute="Node ages"; - set nodeLabels.fontName="sansserif"; - set nodeLabels.fontSize=8; - set nodeLabels.fontStyle=0; - set nodeLabels.isShown=false; - set nodeLabels.significantDigits=4; - set nodeShape.colourAttribute="User selection"; - set nodeShape.isShown=false; - set nodeShape.minSize=10.0; - set nodeShape.scaleType=Width; - set nodeShape.shapeType=Circle; - set nodeShape.size=4.0; - set nodeShape.sizeAttribute="Fixed"; - set polarLayout.alignTipLabels=false; - set polarLayout.angularRange=0; - set polarLayout.rootAngle=0; - set polarLayout.rootLength=100; - set polarLayout.showRoot=true; - set radialLayout.spread=0.0; - set rectilinearLayout.alignTipLabels=false; - set rectilinearLayout.curvature=0; - set rectilinearLayout.rootLength=100; - set scale.offsetAge=0.0; - set scale.rootAge=1.0; - set scale.scaleFactor=1.0; - set scale.scaleRoot=false; - set scaleAxis.automaticScale=true; - set scaleAxis.fontSize=8.0; - set scaleAxis.isShown=false; - set scaleAxis.lineWidth=1.0; - set scaleAxis.majorTicks=1.0; - set scaleAxis.origin=0.0; - set scaleAxis.reverseAxis=false; - set scaleAxis.showGrid=true; - set scaleBar.automaticScale=true; - set scaleBar.fontSize=10.0; - set scaleBar.isShown=true; - set scaleBar.lineWidth=1.0; - set scaleBar.scaleRange=0.0; - set tipLabels.colorAttribute="User selection"; - set tipLabels.displayAttribute="Names"; - set tipLabels.fontName="sansserif"; - set tipLabels.fontSize=8; - set tipLabels.fontStyle=0; - set tipLabels.isShown=true; - set tipLabels.significantDigits=4; - set trees.order=false; - set trees.orderType="increasing"; - set trees.rooting=false; - set trees.rootingType="User Selection"; - set trees.transform=false; - set trees.transformType="cladogram"; -end;''' - - -def get_args(): - - parser = argparse.ArgumentParser( - prog = 'Tree-coloring script, Version 2.0', - description = "Updated Aug 1st, 2023 by Auden Cote-L'Heureux" - ) - - parser.add_argument('-i', '--input', type = str, required = True, help = 'Path to a folder containing input trees (which must have the file extension .tre, .tree, .treefile, or .nex)') - parser.add_argument('-k', '--keyfile', type = str, help = 'Path to a text file with two tab-separated columns; the first a set of keys and the second a color for each key in hex-code format. Any sequence starting with a particular key will be assigned the color corresponding to that key in this file.') - - return parser.parse_args() - - -#Function to extract newick string from either newick or nexus file -def get_newick(fname): - - newick = '' - for line in open(fname): - line = line.split(' ')[-1] - if(line.startswith('(') or line.startswith('tree1=')): - newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '') - - return newick - - -def reroot(tree): - - #This nested function returns the largest clade of a given taxonomic group - def get_best_clade(taxon): - - best_size = 0; best_clade = []; seen_leaves = [] - #Traverse all nodes - for node in tree.traverse('levelorder'): - #If the node is big enough and not subsumed by a node we've already accepted - if len(node) >= 3 and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0: - leaves = [leaf.name for leaf in node] - - #Create a record of leaves that belong to the taxonomic group - target_leaves = set() - for leaf in leaves[::-1]: - if leaf[:2] in taxon: - target_leaves.add(leaf[:10]) - leaves.remove(leaf) - - #If this clade is better than any clade we've seen before, grab it - if len(target_leaves) > best_size and len(leaves) <= 2: - best_clade = node - best_size = len(target_leaves) - seen_leaves.extend([leaf.name for leaf in node]) - - return best_clade - - #Get the biggest clade for each taxonomic group (stops once it finds one) - for taxon in [('Ba', 'Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]: - clade = get_best_clade(taxon) - if len([leaf for leaf in clade if leaf.name[:2] in taxon]) > 3: - tree.set_outgroup( clade) - - break - - return tree - - -def write_lines(o, newick, taxa_and_colors, tree_font_size): - ntax = str(len(taxa_and_colors)) - - #writes the header to the tree file - o.write('#NEXUS\n') - o.write('begin taxa;\n') - o.write('\tdimensions ntax=' + ntax + ';\n') - o.write('\ttaxlabels\n') - - #write out all taxa - for taxon in taxa_and_colors: - o.write('\t' + taxon + '\n') - - o.write(';\nend;\n\n') - - o.write('begin trees;\n') - o.write('\ttree tree_1 = [&R]\n') - o.write(newick) - o.write('end;\n\n') - - - for line in figtree_format: - if('.fontSize' in line): - o.write(line.replace('8', tree_font_size)) - else: - o.write(line) - - -def write_nexus(newick, leaf_colors, params): - - with open(out_path, 'w') as o: - write_lines(o, newick, taxa_and_colors, tree_font_size) - - -def color(file, args): - - if args.keyfile != None: - if os.path.isfile(args.keyfile): - try: - colors = { line.split('\t')[0] : line.split('\t')[1].strip() for line in open(args.keyfile) if len(line.split('\t')) == 2 } - except: - print('\nERROR: your keyfile is incorrectly formatted\n') - exit() - else: - print('\nERROR: your input keyfile could not be found\n') - else: - colors = { 'Ba' : '[&!color=#000000]', 'Za' : '[&!color=#808080]', 'Sr' : '[&!color=#7b2516]', 'Op' : '[&!color=#12aaff]', 'Pl' : '[&!color=#006300]', 'Ex' : '[&!color=#ffa100]', 'EE' : '[&!color=#ff6288]', 'Am' : '[&!color=#aa00ff]' } - - newick = get_newick(file) - tree = ete3.Tree(newick) - - majs = list(dict.fromkeys([leaf.name[:2] for leaf in tree])) - - #Only try to reroot trees with more than 2 major clades. This was added to fix the ETE3 "Cannot set myself as outgroup" error - if len(majs) > 2: - tree = reroot(tree) - - tree.ladderize() - - leaf_colors = [] - for leaf in tree: - keys = sorted([key for key in colors if leaf.name.startswith(key)], key = lambda x : -len(x)) - - if len(keys) > 0: - if '[&!color=' in colors[keys[0]]: - leaf_colors.append(leaf.name + colors[keys[0]]) - else: - leaf_colors.append(leaf.name + '[&!color=' + colors[keys[0]] + ']') - else: - leaf_colors.append(leaf.name) - - with open('ColoredTrees/' + file.split('/')[-1].split('.tree')[0] + '_Colored.tree', 'w') as o: - write_lines(o, newick, leaf_colors, str(12))#change tree font size here (right now it is 12) - -if __name__ == '__main__': - - args = get_args() - - if not os.path.isdir('ColoredTrees'): - os.mkdir('ColoredTrees') - - for tree in os.listdir(args.input): - if tree.split('.')[-1] in ('tree', 'tre', 'treefile', 'nex'): - color(args.input + '/' + tree, args) - - - - - - - - - - - - - - - - -