Add files via upload

2026-02-11 01:00:26 +08:00 · 2024-02-07 11:23:49 -05:00 · 2024-02-07 11:23:49 -05:00 · 9844842aed
commit 9844842aed
parent 00ee7e44b3
1 changed files with 278 additions and 113 deletions
--- a/PTL2/Scripts/contamination.py
+++ b/PTL2/Scripts/contamination.py
@ -1,8 +1,12 @@
+#Updated by ACL on 1/16 to fix subsisters
+
+
 import os, sys, re
 from Bio import SeqIO
 import ete3
 import guidance
 import trees
+from statistics import mean

 	
 def get_newick(fname):
@ -67,74 +71,114 @@ def get_subtrees(args, file):
 	except:
 		print('\nUnable to re-root the tree ' + file + ' (maybe it had only 1 major clade, or an inconvenient polytomy). Skipping this step and continuing to try to grab robust clades from the tree.\n')					

-	#Getting a clean list of all target taxa
-	if os.path.isfile(args.target):
-		try:
-			target_codes = [l.strip() for l in open(args.target).readlines() if l.strip() != '']
-		except AttributeError:
-			print('\n\nError: invalid "target" argument. This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n\n')
-	else:
-		#make sure that this is how nargs works
-		target_codes = [code.strip() for code in args.target if code.strip() != '']
+	exceptions = []
+	if args.clade_grabbing_exceptions != None:
+		if os.path.isfile(args.clade_grabbing_exceptions):
+			exceptions = [line.strip() for line in open(args.clade_grabbing_exceptions)]
+		else:
+			print('\nError: it looks like you tried to input a clade grabbing exceptions file, but it could not be found.\n')
+			exit()

-	#Getting a clean list of all "at least" taxa
-	if os.path.isfile(args.required_taxa):
-		try:
-			at_least_codes = [l.strip() for l in open(args.required_taxa).readlines() if l.strip() != '']
-		except AttributeError:
-			print('\n\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S OR Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n\n')
-	else:
-		#make sure that this is how nargs works
-		at_least_codes = [code.strip() for code in args.required_taxa if code.strip() != '']
+	rules_per_clade = []
+	if args.clade_grabbing_rules_file != None:
+		if os.path.isfile(args.clade_grabbing_rules_file):
+			lines = [line.strip().split('\t') for line in open(args.clade_grabbing_rules_file) if len(line.strip().split('\t')) == 5]

-	target_codes = list(dict.fromkeys(target_codes + at_least_codes))
+			for line in lines:
+				if line[4].lower() == 'na':
+					rules_per_clade.append({ 'target_taxa' : line[0], 'num_contams' : int(line[1]), 'min_target_presence' : int(line[2]), 'required_taxa' : line[3], 'required_taxa_num' : 0 })
+				else:
+					rules_per_clade.append({ 'target_taxa' : line[0], 'num_contams' : int(line[1]), 'min_target_presence' : int(line[2]), 'required_taxa' : line[3], 'required_taxa_num' : int(line[4]) })
+
+		else:
+			print('\nError: it looks like you tried to input a clade grabbing rules file, but it could not be found.\n')
+			exit()
+	else:
+		rules_per_clade.append({ 'target_taxa' : args.target_taxa, 'num_contams' : args.num_contams, 'min_target_presence' : args.min_target_presence, 'required_taxa' : args.required_taxa, 'required_taxa_num' : args.required_taxa_num })
+		
+	for clade in rules_per_clade:
+		if os.path.isfile(clade['target_taxa']):
+			try:
+				clade['target_taxa'] = [l.strip() for l in open(clade['target_taxa']).readlines() if l.strip() != '']
+			except AttributeError:
+				print('\nError: invalid "target_taxa" input (' + clade['target_taxa'] + '). This must be a comma-separated list of any number of digits/characters to describe focal taxa (e.g. Sr_ci_S,Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes. All sequences containing the complete/partial code will be identified as belonging to target taxa.\n')
+				exit()
+		else:
+			clade['target_taxa'] = [code.strip() for code in clade['target_taxa'].split(',') if code.strip() != '']
+
+		if clade['required_taxa'] != None:
+			if os.path.isfile(clade['required_taxa']):
+				try:
+					clade['required_taxa'] = [l.strip() for l in open(clade['required_taxa']).readlines() if l.strip() != '']
+				except AttributeError:
+					print('\nError: invalid "required_taxa" argument. This must be a comma-separated list of any number of digits/characters (e.g. Sr_ci_S,Am_t), or a file with the extension .txt containing a list of complete or partial taxon codes, to describe taxa that MUST be present in a clade for it to be selected (e.g. you may want at least one whole genome).\n')
+			else:
+				clade['required_taxa'] = [code.strip() for code in clade['required_taxa'].split(',') if code.strip() != '' and code.strip().lower() != 'na']
+
+			clade['target_taxa'] = clade['target_taxa'] + clade['required_taxa']
+		else:
+			clade['required_taxa'] = []

 	#Creating a record of selected subtrees, and all of the leaves in those subtrees
-	selected_nodes = []; seen_leaves = []
+	selected_leaves = []

-	#Iterating through all nodes in tree, starting at "root" then working towards leaves
-	for node in tree.traverse('levelorder'):
-		#If a node is large enough and is not contained in an already selected clade
-		if len(node) >= args.min_target_presence and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
-			leaves = [leaf.name for leaf in node]
+	for clade in rules_per_clade:
+		seen_leaves = []

-			#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
-			children_keep = 0
-			for child in node.children:
-				for code in target_codes:
-					for leaf in child:
-						if leaf.name.startswith(code):
-							children_keep += 1
+		#Iterating through all nodes in tree, starting at "root" then working towards leaves
+		for node in tree.traverse('levelorder'):
+			#If a node is large enough and is not contained in an already selected clade
+			if len(node) >= clade['min_target_presence'] and len(list(set(seen_leaves) & set([leaf.name for leaf in node]))) == 0:
+				leaves = [leaf.name for leaf in node]
+
+				#Accounting for cases where e.g. one child is a contaminant, and the other child is a good clade with 1 fewer than the max number of contaminants
+				children_keep = 0
+				for child in node.children:
+					for code in clade['target_taxa']:
+						taken = False
+						for leaf in child:
+							if leaf.name.startswith(code):
+								children_keep += 1
+								taken = True
+								break
+
+						if taken:
 							break

-			if children_keep == len(node.children):
-				#Creating a record of all leaves belonging to the target/"at least" group of taxa, and any other leaves are contaminants
-				target_leaves = set(); at_least_leaves = set()
-				for code in target_codes:
-					for leaf in leaves[::-1]:
-						if leaf.startswith(code):
-							target_leaves.add(leaf[:10])
+				if children_keep == len(node.children):

-							if code in at_least_codes:
-								at_least_leaves.add(leaf[:10])
+					#Creating a record of all leaves belonging to the target/"at least" group of taxa, and any other leaves are contaminants
+					target_leaves = set(); at_least_leaves = set(); target_leaves_full_names = []
+					for code in clade['target_taxa']:
+						for leaf in leaves[::-1]:
+							if leaf.startswith(code):
+								target_leaves.add(leaf[:10])
+								target_leaves_full_names.append(leaf)

-							leaves.remove(leaf)
+								for req in clade['required_taxa']:
+									if leaf.startswith(req):
+										at_least_leaves.add(leaf[:10])
+										break

-				#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
-				if len(target_leaves) >= args.min_target_presence and len(at_least_leaves) >= args.n_at_least and ((args.contaminants < 1 and len(leaves) < args.contaminants * len(target_leaves)) or len(leaves) < args.contaminants):
-					selected_nodes.append(node)
-					seen_leaves.extend([leaf.name for leaf in node])
+								leaves.remove(leaf)

-	#Write the subtrees to output .tre files
-	seqs2keep = [leaf.name for node in selected_nodes for leaf in node]
+					#Grab a clade as a subtree if 1) it has enough target taxa; 2) it has enough "at least" taxa; 3) it does not have too many contaminants
+					if len(target_leaves) >= clade['min_target_presence'] and len(at_least_leaves) >= clade['required_taxa_num'] and ((clade['num_contams'] < 1 and len(leaves) <= clade['num_contams'] * len(target_leaves)) or len(leaves) <= clade['num_contams']):
+						selected_leaves.extend(target_leaves_full_names)
+
+						seen_leaves.extend([leaf.name for leaf in node])
+
+	all_clades = [clade for group in rules_per_clade for clade in group['target_taxa']]
+
+	seqs2keep = [leaf.name for leaf in tree if leaf.name in selected_leaves or not any([leaf.name.startswith(clade) for clade in all_clades]) or any([leaf.name.startswith(ex) for ex in exceptions])]

 	return seqs2keep


-def get_sisters(args, file, contam_per_tax):
-				
+def get_sisters(args, file, sister_contam_per_tax, subsister_contam_per_tax):
+
 	seqs2remove = []
-	
+
 	#Read the tree using ete3 and reroot it using the above function
 	newick = get_newick(file)
 	tree = ete3.Tree(newick)
@ -144,54 +188,126 @@ def get_sisters(args, file, contam_per_tax):
 	except:
 		print('\nUnable to re-root the tree ' + file + ' (maybe it had only 1 major clade, or an inconvenient polytomy). Skipping this step and continuing to try to grab robust clades from the tree.\n')

+	mean_bl = mean([leaf.dist for leaf in tree])
+
+	all_taxa_in_tree = [leaf.name[:10] for leaf in tree]
+
+	coconts = { }
+	if args.cocontaminants != None:
+		if os.path.isfile(args.cocontaminants):
+			for line in open(args.cocontaminants):
+				line = line.strip().split('\t')
+				if len(line) == 2:
+					if line[0] in all_taxa_in_tree:
+						coconts.update({ line[0] : line[1] })
+		else:
+			print('\nERROR: It looks like you tried to input a co-contaminants file to the contamination loop, but the file could not be found.\n')
+			exit()
+
+	for tax in all_taxa_in_tree:
+		if tax not in coconts:
+			coconts.update({ tax : tax })
+
 	#For each sequence
 	for leaf in tree:
+		bad_sisters = { contam[0] : contam[1] for tax in sister_contam_per_tax for contam in sister_contam_per_tax[tax] if leaf.name.startswith(tax) }
+		bad_subsisters = [contam for tax in subsister_contam_per_tax for contam in subsister_contam_per_tax[tax] if leaf.name.startswith(tax)]

-		#This loop will keep moving towards the root of the tree until it finds a node that
-		#has leaves from a cell other than the one for which we are looking for sisters
-		parent_node = leaf; sister_taxa = {leaf.name[:10]}
-		while len(sister_taxa) == 1:
-			parent_node = parent_node.up
-			for l2 in parent_node:
-				sister_taxa.add(l2.name[:10])
+		if len(bad_sisters) > 0 or len(bad_subsisters) > 0:
+			#This loop will keep moving towards the root of the tree until it finds a node that
+			#has leaves from a cell other than the one for which we are looking for sisters
+			parent_node = leaf; seen_taxa = {coconts[leaf.name[:10]]}; sisters = []
+			while len(seen_taxa) == 1:
+				parent_node = parent_node.up

-		#Create a record of the sister sequences
-		sisters = list(dict.fromkeys([sister for sister in parent_node if sister.name[:10] != leaf.name[:10]]))
+				for l2 in parent_node:
+					seen_taxa.add(coconts[l2.name[:10]])
+					if coconts[l2.name[:10]] != coconts[leaf.name[:10]]:
+						sisters.append(l2.name[:10])

-		bad_sisters = list(dict.fromkeys([contam for tax in contam_per_tax for contam in contam_per_tax[tax] if leaf.name.startswith[tax]]))
+			#Create a record of the subsister sequences
+			sub_sisters = []
+			if args.subsister_rules != None:
+				new_parent_node = parent_node.up

-		sisters_removable = []
-		for contam in bad_sisters:
-			for sister in sisters:
-				if sister.startswith(contam) and sister not in sisters_removable:
-					sisters_removable.append(sister)
-		
-		if len(sisters_removable) == len(sisters):
-			seqs2remove.append(leaf.name)
+				for l2 in new_parent_node:
+					if l2.name not in [l.name for l in parent_node]:
+						sub_sisters.append(l2.name)
+
+			#Create a record of the sister sequences
+			sisters = list(dict.fromkeys(sisters))
+
+			sisters_removable = []; bls = []
+			for contam in bad_sisters:
+				for sister in sisters:
+					if sister.startswith(contam) and sister not in sisters_removable:
+						sisters_removable.append(sister)
+						bls.append(bad_sisters[contam])
+
+			subsisters_removable = []
+			for contam in bad_subsisters:
+				for sub_sister in sub_sisters:
+					if sub_sister.startswith(contam) and sub_sister not in subsisters_removable:
+						subsisters_removable.append(sub_sister)
+
+			if len(bls) > 0:
+				bl_rule_min = min(bls)
+			else:
+				bl_rule_min = 0
+
+			if len(sisters_removable) == len(sisters) and leaf.dist <= bl_rule_min*mean_bl and len(sisters_removable) > 0:
+				seqs2remove.append(leaf.name)
+			elif len(subsisters_removable) == len(sub_sisters) and len(subsisters_removable) > 0:
+				seqs2remove.append(leaf.name)

 	return [leaf.name for leaf in tree if leaf.name not in seqs2remove]


+def write_new_preguidance(params, seqs2keep, seqs_per_og, tree_file):

-def write_new_preguidance(params, seqs2keep, seqs_per_og):
+	if params.cl_exclude_taxa != None:
+		try:
+			exclude_taxa = list(dict.fromkeys([line.strip() for line in open(params.cl_exclude_taxa)]))
+		except (FileNotFoundError, TypeError) as e:
+			print('\nERROR: Unable to read the file listing taxa to exclude in the first iteration of the contamination loop (--cl_exclude_taxa). Please make sure that the path is correct and that the file is formatted correctly.\n\n' + str(e) + '\n')
+			exit()
+	else:
+		exclude_taxa = []

-	prefix = '.'.join(tree_file.split('.')[:-1])
+	prefix = tree_file.split('.')[0]
 	seq_file = [file for file in seqs_per_og if file.startswith(prefix)]
-	if len(seq_file) == 0:
-		seq_file = [file for file in seqs_per_og if file.startswith(prefix.split('.')[0])]

-		if len(seq_file) == 0:
-			print('\nNo sequence file found for tree file ' + tree_file + '. Skipping this gene family.\n')
-		elif len(seq_file) > 1:
-			print('\nMore than one sequence file found matching the tree file ' + tree_file + '. Please make your file names more unique: there should be one sequence file for every tree file, with a matching unique prefix (everything before the first "."). Skipping this gene family.\n')
-		
-	if len(seq_file) == 1:
-		with open(params.output + '/Pre-Guidance/' + seq_file, 'w') as o:
-			for rec in seqs_per_og[seq_file]:
-				if rec in seqs2keep:
-					o.write('>' + rec + '\n' + seqs_per_og[seq_file][rec] + '\n\n')
-na
-		seqs_removed_from_og = [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+	if len(seq_file) == 0:
+		print('\nNo sequence file found for tree file ' + tree_file + '. Skipping this gene family.\n')
+		return None, []
+	elif len(seq_file) > 1:
+		print('\nMore than one sequence file found matching the tree file ' + tree_file + '. Please make your file names more unique: there should be one sequence file for every tree file, with a matching unique prefix (everything before the first "."). Skipping this gene family.\n')
+		return None, []
+	elif len(seq_file) == 1:
+		with open(params.output + '/Output/Pre-Guidance/' + seq_file[0], 'w') as o:
+			for rec in seqs_per_og[seq_file[0]]:
+				if rec in seqs2keep and rec[:10] not in exclude_taxa and rec[:2] not in exclude_taxa and rec[:5] not in exclude_taxa:
+					o.write('>' + rec + '\n' + seqs_per_og[seq_file[0]][rec] + '\n\n')
+
+		seqs_removed_from_og = [seq for seq in seqs_per_og[seq_file[0]] if seq not in seqs2keep]
+
+		return seq_file[0], seqs_removed_from_og
+
+
+def cl_mafft(params):
+
+	for file in os.listdir(params.output + '/Output/Pre-Guidance'):
+		if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
+			os.system('mafft ' + params.output + '/Output/Pre-Guidance/' + file + ' > ' + params.output + '/Output/NotGapTrimmed/' + file)
+
+			os.system('Scripts/trimal-trimAl/source/trimal -in ' + params.output + '/Output/NotGapTrimmed/' + file + ' -out ' + params.output + '/Output/Guidance/' + file.split('.')[0] + '.95gapTrimmed.fasta' + ' -gapthreshold 0.05 -fasta')
+
+
+def cl_fasttree(params):
+
+	for file in os.listdir(params.output + '/Output/Guidance'):
+		if file.split('.')[-1] in ('fasta', 'fas', 'faa'):
+			os.system('FastTree ' + params.output + '/Output/Guidance/' + file + ' > ' + params.output + '/Output/Trees/' + file.split('.')[0] + '.FastTree.tre')


 def run(params):
@ -199,58 +315,107 @@ def run(params):
 	seqs_removed = []
 	completed_ogs = []

+	with open('SequencesRemoved_ContaminationLoop.txt', 'w') as o:
+		o.write('Sequence\tLoopRemoved\n')
+
 	for loop in range(params.nloops):
+		seqs_removed_loop = []
 		if params.start == 'raw':
-			seqs_per_og = { file : { rec.id : str(rec.seq) for rec in SeqIO.parse(file, 'fasta') } for file in os.listdir(params.output + '/Output/Pre-Guidance') if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
+			seqs_per_og = { file : { rec.id : str(rec.seq) for rec in SeqIO.parse(params.output + '/Output/Pre-Guidance/' + file, 'fasta') } for file in os.listdir(params.output + '/Output/Pre-Guidance') if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
 		elif params.start in ('unaligned', 'aligned', 'trees'):
-			seqs_per_og = { file : { rec.id : str(rec.seq).replace('-', '') for rec in SeqIO.parse(file, 'fasta') } for file in os.listdir(params.data) if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
+			seqs_per_og = { file : { rec.id : str(rec.seq).replace('-', '') for rec in SeqIO.parse(params.data + '/' + file, 'fasta') } for file in os.listdir(params.data) if file.split('.')[-1] in ('fasta', 'fas', 'faa') }
+
+			if loop == 0:
+				for file in os.listdir(params.data):
+					if file.split('.')[-1] in ('tre', 'tree', 'treefile'):
+						os.system('cp ' + params.data + '/' + file + ' ' + params.output + '/Output/Trees')

 		if loop > 0 or params.start == 'raw':
-			os.system('mv ' + params.output + '/Pre-Guidance ' + params.output + '/Pre-Guidance_' + str(loop))
-		
-		os.mkdir(params.output + '/Pre-Guidance')
+			os.system('mv ' + params.output + '/Output/Pre-Guidance ' + params.output + '/Output/Pre-Guidance_' + str(loop))
+
+			os.mkdir(params.output + '/Output/Pre-Guidance')

 		if params.contamination_loop == 'clade':
-			for tree_file in params.output + '/Trees':
+			for tree_file in os.listdir(params.output + '/Output/Trees'):
 				if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex') and tree_file not in completed_ogs:
-					seqs2keep = get_subtrees(params, params.output + '/Trees/' + tree_file)
+					seqs2keep = get_subtrees(params, params.output + '/Output/Trees/' + tree_file)

-					seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og)
+					seq_file, seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og, tree_file)

 					if len(seqs_removed_from_og) == 0:
 						completed_ogs.append(tree_file)
 					else:
-						seqs_removed += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+						seqs_removed_loop += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep and seq not in seqs_removed]

 		elif params.contamination_loop == 'seq':
-			contam_per_tax = { line.strip().split('\t')[0] : line.strip().split('\t')[1:] for line in params.sister_rules }

-			if params.contamination_loop == 'clade':
-				for tree_file in params.output + '/Trees':
-					if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex') and tree_file not in completed_ogs:
-						seqs2keep = get_sisters(params, params.output + '/Trees/' + tree_file, contam_per_tax)
+			sister_contam_per_tax = { }
+			if params.sister_rules != None:
+				for line in open(params.sister_rules):
+					if line.strip().split('\t')[0] not in sister_contam_per_tax:
+						sister_contam_per_tax.update({ line.strip().split('\t')[0] : [] })

-						seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og)
+					try:
+						sister_contam_per_tax[line.strip().split('\t')[0]].append((line.strip().split('\t')[1], float(line.strip().split('\t')[2])))
+					except ValueError:
+						sister_contam_per_tax[line.strip().split('\t')[0]].append((line.strip().split('\t')[1], float('inf')))
+					except IndexError:
+						if line.strip() != '':
+							print('\nWarning: the line "' + line.strip() + '" in the sister rules file could not be processed\n')

-						if len(seqs_removed_from_og) == 0:
-							completed_ogs.append(tree_file)
-						else:
-							seqs_removed += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep]
+			subsister_contam_per_tax = { }
+			if params.subsister_rules != None:
+				if os.path.isfile(params.subsister_rules):
+					for line in open(params.subsister_rules):
+						if line.strip().split('\t')[0] not in subsister_contam_per_tax and len(line.strip().split('\t')) == 2:
+							subsister_contam_per_tax.update({ line.strip().split('\t')[0] : [] })

-		os.system('mv ' + params.output + '/Trees ' + params.output + '/Trees_' + str(loop))
-		os.mkdir(params.output + '/Trees')
+						subsister_contam_per_tax[line.strip().split('\t')[0]].append(line.strip().split('\t')[1])
+				else:
+					print('\nERROR: It looks like you tried to input a sub-sister rules file to the contamination loop, but the file could not be found.\n')
+					exit()

-		os.system('mv ' + params.output + '/Guidance ' + params.output + '/Guidance_' + str(loop))
-		os.mkdir(params.output + '/Guidance')
+			for tree_file in os.listdir(params.output + '/Output/Trees'):
+				if tree_file.split('.')[-1] in ('tre', 'tree', 'treefile', 'nex') and tree_file not in completed_ogs:
+					seqs2keep = get_sisters(params, params.output + '/Output/Trees/' + tree_file, sister_contam_per_tax, subsister_contam_per_tax)
+
+					seq_file, seqs_removed_from_og = write_new_preguidance(params, seqs2keep, seqs_per_og, tree_file)
+
+					if len(seqs_removed_from_og) == 0:
+						completed_ogs.append(tree_file)
+					else:
+						seqs_removed_loop += [seq for seq in seqs_per_og[seq_file] if seq not in seqs2keep and seq not in seqs_removed]
+		
+		seqs_removed += seqs_removed_loop
+		with open(params.output + '/Output/SequencesRemoved_ContaminationLoop.txt', 'a') as o:
+			for seq in seqs_removed_loop:
+				o.write(seq + '\t' + str(loop) + '\n')
+
+		os.system('mv ' + params.output + '/Output/Trees ' + params.output + '/Output/Trees_' + str(loop))
+		os.mkdir(params.output + '/Output/Trees')
+
+		os.system('mv ' + params.output + '/Output/Guidance ' + params.output + '/Output/Guidance_' + str(loop))
+		os.mkdir(params.output + '/Output/Guidance')
+
+		os.system('mv ' + params.output + '/Output/NotGapTrimmed ' + params.output + '/Output/NotGapTrimmed_' + str(loop))
+		os.mkdir(params.output + '/Output/NotGapTrimmed')

 		params.start = 'unaligned'
 		params.end = 'trees'
+		params.tree_method = params.cl_tree_method

-		guidance.run(params)
-		trees.run(params)
+		if params.cl_alignment_method == 'mafft_only':
+			cl_mafft(params)
+		else:
+			guidance.run(params)

-	with open('SequencesRemoved_ContaminationLoop.txt', 'w') as o:
-		for seq in seqs_removed:
-			o.write(seq + '\n')
+		if params.cl_tree_method == 'fasttree':
+			cl_fasttree(params)
+		else:
+			if 'iqtree' in params.cl_tree_method:
+				os.system('rm -r ' + params.output + '/Output/Intermediate/IQTree/*')
+			elif params.cl_tree_method == 'raxml':
+				os.system('rm -r ' + params.output + '/Output/Intermediate/RAxML/*')

+			trees.run(params)