diff --git a/PTL2/Scripts/contamination.py b/PTL2/Scripts/contamination.py
index 324ce54..b1c70e1 100644
--- a/PTL2/Scripts/contamination.py
+++ b/PTL2/Scripts/contamination.py
@@ -1,55 +1,256 @@
 import os, sys, re
+from Bio import SeqIO
 import ete3
-from logger import Logger
+import guidance
+import trees
 
-
-def get_best_clade(params, tree):
-
-	if params.target_taxa_file != None:
-		try:
-			target_taxa_list = [line.strip() for line in open(PathtoFiles + '/' + target_taxa_list)]
-		except (FileNotFoundError, TypeError):
-			Logger.Error('The --target_taxa_file could not be found or was incorrectly formatted.')
-
-	if params.at_least_file != None:
-		try:
-			at_least_list = [line.strip() for line in open(PathtoFiles + '/' + at_least_file)]
-		except (FileNotFoundError, TypeError):
-			Logger.Error('The --at_least_file could not be found or was incorrectly formatted.')
-	else:
-		at_least_list = []
-
-
-	### FROM HERE BELOW IN THIS FUNCTION NEEDS REPLACING WITH ETE3
-		
-	forbidden_nodes = [node for node in nodes_to_exclude]
-	for node in nodes_to_exclude:
-		for num in tree.getNodeNumsAbove(node):
-			forbidden_nodes.append(tree.node(num))
 	
-	best_node = None
-	best_size = 0
-	for node in tree.iterNodesNoRoot():
-		if(node not in forbidden_nodes):
-			leaves = tree.getAllLeafNames(node)
-							
-			num = 0.0; dem = 0.0;
-		
-			non_minor = []
-			for leaf in leaves:
-				if(leaf[:2] != target_minor and leaf[:4] != target_minor):
-					num += 1.0;
-					non_minor.append(leaf[:10])
-					
-			if(target_taxa_list == 'na' or target_taxa_list == '' or target_taxa_list == 'NA'):
-				n_targets = len(list(dict.fromkeys([tip[:10] for tip in leaves if(tip[:2] == target_clade or tip[:3] == target_clade or tip[:4] == target_clade or tip[:5] == target_clade or tip[:7] == target_clade or tip[:8] == target_clade)])))
-			else:
-				n_targets = len(list(dict.fromkeys([tip[:10] for tip in leaves if((tip[:2] == target_clade or tip[:3] == target_clade or tip[:4] == target_clade or tip[:5] == target_clade or tip[:7] == target_clade or tip[:8] == target_clade) and (tip[:10] in target_taxa_list or tip[:8] in target_taxa_list))])))
-							
-			at_least_taxa = len(list(dict.fromkeys([leaf[:10] for leaf in tree.getAllLeafNames(node) if leaf[:10] in at_least_list])))
+def get_newick(fname):
+	
+	newick = ''
+	for line in open(fname):
+		line = line.split(' ')[-1]
+		if(line.startswith('(') or line.startswith('tree1=')):
+			newick = line.split('tree1=')[-1].replace("'", '').replace('\\', '')
+
+	return newick
+
+
+#This function reroots the tree on the largest Ba/Za clade. If there is no prokaryote clade,
+#it roots on the largest Op clade, then Pl, then Am, then Ex, then Sr.
+def reroot(tree):
+
+	#This nested function returns the largest clade of a given taxonomic group
+	def get_best_clade(taxon):
+
+		best_size = 0; best_clade = []; seen_leaves = []
+		#Traverse all nodes
+		for node in tree.traverse('levelorder'):
+			#If the node is big enough and not subsumed by a node we've already accepted
+			if len(node) >= 3 and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
+				leaves = [leaf.name for leaf in node]
+				
+				#Create a record of leaves that belong to the taxonomic group
+				target_leaves = set()
+				for leaf in leaves[::-1]:
+					if leaf[:2] in taxon:
+						target_leaves.add(leaf[:10])
+						leaves.remove(leaf)
+
+				#If this clade is better than any clade we've seen before, grab it
+				if len(target_leaves) > best_size and len(leaves) <= 2:
+					best_clade = node
+					best_size = len(target_leaves)
+					seen_leaves.extend([leaf.name for leaf in node])
+
+		return best_clade
+
+	#Get the biggest clade for each taxonomic group (stops once it finds one)
+	for taxon in [('Ba', 'Za'), ('Op'), ('Pl'), ('Am'), ('Ex'), ('Sr')]:
+		clade = get_best_clade(taxon)
+		if len([leaf for leaf in clade if leaf.name[:2] in taxon]) > 3:
+			tree.set_outgroup( clade)
+
+			break
+
+	return tree
+	
+	
+def get_subtrees(args, file):
+
+	newick = get_newick(file)	
+
+	tree = ete3.Tree(newick)
+
+	try:
+		tree = reroot(tree)
+	except:
+		print('\nUnable to re-root the tree ' + file + ' (maybe it had only 1 major clade, or an inconvenient polytomy). Skipping this step and continuing to try to grab robust clades from the tree.\n')					
+
+	#Getting a clean list of all target taxa
+	if os.path.isfile(args.target):
+		try:
+			target_codes = [l.strip() for l in open(args.target).readlines() if l.strip() != '']
+		except AttributeError:
+			print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
+	else:
+		#make sure that this is how nargs works
+		target_codes = [code.strip() for code in args.target if code.strip() != '']
+
+	#Getting a clean list of all "at least" taxa
+	if os.path.isfile(args.required_taxa):
+		try:
+			at_least_codes = [l.strip() for l in open(args.required_taxa).readlines() if l.strip() != '']
+		except AttributeError:
+			print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
+	else:
+		#make sure that this is how nargs works
+		at_least_codes = [code.strip() for code in args.required_taxa if code.strip() != '']
+
+	target_codes = list(dict.fromkeys(target_codes + at_least_codes))
+
+	#Creating a record of selected subtrees, and all of the leaves in those subtrees
+	selected_nodes = []; seen_leaves = []
+
+	#Iterating through all nodes in tree, starting at "root" then working towards leaves
+	for node in tree.traverse('levelorder'):
+		#If a node is large enough and is not contained in an already selected clade
+		if len(node) >= args.min_target_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
+			leaves = [leaf.name for leaf in node]
+
+			#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
+			children_keep = 0
+			for child in node.children:
+				for code in target_codes:
+					for leaf in child:
+						if leaf.name.startswith(code):
+							children_keep += 1
+							break
+
+			if children_keep == len(node.children):
+				#Creating a record of all leaves belonging to the target/"at least" group of taxa, and any other leaves are contaminants
+				target_leaves = set(); at_least_leaves = set()
+				for code in target_codes:
+					for leaf in leaves[::-1]:
+						if leaf.startswith(code):
+							target_leaves.add(leaf[:10])
+
+							if code in at_least_codes:
+								at_least_leaves.add(leaf[:10])
+
+							leaves.remove(leaf)
+
+				#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
+				if len(target_leaves) >= args.min_target_presence and len(at_least_leaves) >= args.n_at_least and ((args.contaminants < 1 and len(leaves) < args.contaminants * len(target_leaves)) or len(leaves) < args.contaminants):
+					selected_nodes.append(node)
+					seen_leaves.extend([leaf.name for leaf in node])
+
+	#Write the subtrees to output .tre files
+	seqs2keep = [leaf.name for node in selected_nodes for leaf in node]
+
+	return seqs2keep
+
+
+def get_sisters(args, file, contam_per_tax):
+				
+	seqs2remove = []
+	
+	#Read the tree using ete3 and reroot it using the above function
+	newick = get_newick(file)
+	tree = ete3.Tree(newick)
+
+	try:
+		tree = reroot(tree)
+	except:
+		print('\nUnable to re-root the tree ' + file + ' (maybe it had only 1 major clade, or an inconvenient polytomy). Skipping this step and continuing to try to grab robust clades from the tree.\n')
+
+	#For each sequence
+	for leaf in tree:
+
+		#This loop will keep moving towards the root of the tree until it finds a node that
+		#has leaves from a cell other than the one for which we are looking for sisters
+		parent_node = leaf; sister_taxa = {leaf.name[:10]}
+		while len(sister_taxa) == 1:
+			parent_node = parent_node.up
+			for l2 in parent_node:
+				sister_taxa.add(l2.name[:10])
+
+		#Create a record of the sister sequences
+		sisters = list(dict.fromkeys([sister for sister in parent_node if sister.name[:10] != leaf.name[:10]]))
+
+		bad_sisters = list(dict.fromkeys([contam for tax in contam_per_tax for contam in contam_per_tax[tax] if leaf.name.startswith[tax]]))
+
+		sisters_removable = []
+		for contam in bad_sisters:
+			for sister in sisters:
+				if sister.startswith(contam) and sister not in sisters_removable:
+					sisters_removable.append(sister)
+		
+		if len(sisters_removable) == len(sisters):
+			seqs2remove.append(leaf.name)
+
+	return [leaf.name for leaf in tree if leaf.name not in seqs2remove]
+
+
+
+def write_new_preguidance(params, seqs2keep, seqs_per_og):
+
+	prefix = '.'.join(tree_file.split('.')[:-1])
+	seq_file = [file for file in seqs_per_og if file.startswith(prefix)]
+	if len(seq_file) == 0:
+		seq_file = [file for file in seqs_per_og if file.startswith(prefix.split('.')[0])]
+
+		if len(seq_file) == 0:
+			print('\nNo sequence file found for tree file ' + tree_file + '. Skipping this gene family.\n')
+		elif len(seq_file) > 1:
+			print('\nMore than one sequence file found matching the tree file ' + tree_file + '. Please make your file names more unique: there should be one sequence file for every tree file, with a matching unique prefix (everything before the first "."). Skipping this gene family.\n')
+		
+	if len(seq_file) == 1:
+		with open(params.output + '/Pre-Guidance/' + seq_file, 'w') as o:
+			for rec in seqs_per_og[seq_file]:
+				if rec in seqs2keep:
+					o.write('>' + rec + '\n' + seqs_per_og[seq_file][rec] + '\n\n')
+na
+		seqs_removed_from_og = [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+
+
+def run(params):
+
+	seqs_removed = []
+	completed_ogs = []
+
+	for loop in range(params.nloops):
+		if params.start == 'raw':
+			seqs_per_og = { file : { rec.id : str(rec.seq) for rec in SeqIO.parse(file, 'fasta') } for file in os.listdir(params.output + '/Output/Pre-Guidance') if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
+		elif params.start in ('unaligned', 'aligned', 'trees'):
+			seqs_per_og = { file : { rec.id : str(rec.seq).replace('-', '') for rec in SeqIO.parse(file, 'fasta') } for file in os.listdir(params.data) if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
+
+		if loop > 0 or params.start == 'raw':
+			os.system('mv ' + params.output + '/Pre-Guidance ' + params.output + '/Pre-Guidance_' + str(loop))
+		
+		os.mkdir(params.output + '/Pre-Guidance')
+
+		if params.contamination_loop == 'clade':
+			for tree_file in params.output + '/Trees':
+				if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex') and tree_file not in completed_ogs:
+					seqs2keep = get_subtrees(params, params.output + '/Trees/' + tree_file)
+
+					seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og)
+
+					if len(seqs_removed_from_og) == 0:
+						completed_ogs.append(tree_file)
+					else:
+						seqs_removed += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+
+		elif params.contamination_loop == 'seq':
+			contam_per_tax = { line.strip().split('\t')[0] : line.strip().split('\t')[1:] for line in params.sister_rules }
+
+			if params.contamination_loop == 'clade':
+				for tree_file in params.output + '/Trees':
+					if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex') and tree_file not in completed_ogs:
+						seqs2keep = get_sisters(params, params.output + '/Trees/' + tree_file, contam_per_tax)
+
+						seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og)
+
+						if len(seqs_removed_from_og) == 0:
+							completed_ogs.append(tree_file)
+						else:
+							seqs_removed += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+
+		os.system('mv ' + params.output + '/Trees ' + params.output + '/Trees_' + str(loop))
+		os.mkdir(params.output + '/Trees')
+
+		os.system('mv ' + params.output + '/Guidance ' + params.output + '/Guidance_' + str(loop))
+		os.mkdir(params.output + '/Guidance')
+
+		params.start = 'unaligned'
+		params.end = 'trees'
+
+		guidance.run(params)
+		trees.run(params)
+
+	with open('SequencesRemoved_ContaminationLoop.txt', 'w') as o:
+		for seq in seqs_removed:
+			o.write(seq + '\n')
+
 
-			if(num <= cont_num_contams and n_targets > best_size and n_targets >= min_presence and at_least_taxa >= num_at_least):
-				best_node = node
-				best_size = n_targets
-			
-	return best_node
\ No newline at end of file