From 6dc90e0d4c657c3fb1aa7b8a2442af356d42c5ca Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Mon, 12 Jun 2023 13:31:51 -0400 Subject: [PATCH] Add files via upload --- .../Scripts/1a_ContigFiltStats.py | 269 + .../Scripts/1b_XSpeciesContaminationAgnes.py | 153 + PTL1/Transcriptomes/Scripts/2a_remove_rRNA.py | 285 + PTL1/Transcriptomes/Scripts/2b_remove_Bact.py | 410 + .../Scripts/3_CountOGsDiamond.py | 372 + .../Scripts/4_InFrameStopFreq.py | 790 + .../Scripts/5_GCodeTranslate.py | 770 + .../Scripts/6_FilterPartials.py | 652 + .../6b_update_cov_post_removepartials.py | 88 + PTL1/Transcriptomes/Scripts/7_FinalizeName.py | 398 + PTL1/Transcriptomes/Scripts/8_SummaryStats.py | 269 + PTL1/Transcriptomes/Scripts/CUB.py | 523 + PTL1/Transcriptomes/Scripts/PTL1.2831.out | 31859 ++++++++++++++++ .../UnexpexctedShortStuffBlameXyrus.txt | 143 + .../Scripts/__pycache__/CUB.cpython-37.pyc | Bin 0 -> 17514 bytes .../Scripts/__pycache__/CUB.cpython-38.pyc | Bin 0 -> 17804 bytes .../Scripts/__pycache__/CUB.cpython-39.pyc | Bin 0 -> 17634 bytes PTL1/Transcriptomes/Scripts/scratch.py | 7 + PTL1/Transcriptomes/Scripts/wrapper.py | 261 + PTL1/Transcriptomes/Scripts/wrapper_submit.sh | 29 + 20 files changed, 37278 insertions(+) create mode 100644 PTL1/Transcriptomes/Scripts/1a_ContigFiltStats.py create mode 100644 PTL1/Transcriptomes/Scripts/1b_XSpeciesContaminationAgnes.py create mode 100644 PTL1/Transcriptomes/Scripts/2a_remove_rRNA.py create mode 100644 PTL1/Transcriptomes/Scripts/2b_remove_Bact.py create mode 100644 PTL1/Transcriptomes/Scripts/3_CountOGsDiamond.py create mode 100644 PTL1/Transcriptomes/Scripts/4_InFrameStopFreq.py create mode 100644 PTL1/Transcriptomes/Scripts/5_GCodeTranslate.py create mode 100644 PTL1/Transcriptomes/Scripts/6_FilterPartials.py create mode 100644 PTL1/Transcriptomes/Scripts/6b_update_cov_post_removepartials.py create mode 100644 PTL1/Transcriptomes/Scripts/7_FinalizeName.py create mode 100644 PTL1/Transcriptomes/Scripts/8_SummaryStats.py create mode 100644 PTL1/Transcriptomes/Scripts/CUB.py create mode 100644 PTL1/Transcriptomes/Scripts/PTL1.2831.out create mode 100644 PTL1/Transcriptomes/Scripts/UnexpexctedShortStuffBlameXyrus.txt create mode 100644 PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-37.pyc create mode 100644 PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-38.pyc create mode 100644 PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-39.pyc create mode 100644 PTL1/Transcriptomes/Scripts/scratch.py create mode 100644 PTL1/Transcriptomes/Scripts/wrapper.py create mode 100644 PTL1/Transcriptomes/Scripts/wrapper_submit.sh diff --git a/PTL1/Transcriptomes/Scripts/1a_ContigFiltStats.py b/PTL1/Transcriptomes/Scripts/1a_ContigFiltStats.py new file mode 100644 index 0000000..fc23727 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/1a_ContigFiltStats.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3.6 + +##__Updated__: 01_04_2023 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 1_ContigFiltStats.py +##__Options__: python 1_ContigFiltStats.py --help + +########################################################################################## +## This script is intended to remove small transcripts or small contigs below a given ## +## minimum size from a transcriptome assembly. ## +## ## +## Prior to running this script, ensure the following: ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## ## +## COMMAND Example Below ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## AutoBactVsEuk.py (removes SSU then Bact) or 2a_removeSSU.py then 2b_removeBact.py ## +## ## +########################################################################################## + + +import argparse, os, sys +from argparse import RawTextHelpFormatter,SUPPRESS +from Bio import SeqIO +from Bio.SeqUtils import GC + + +#----------------------------- Colors For Print Statements ------------------------------# + +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD+'\nThis script will remove Contigs (and provide a summary of statistics)'\ + +'\nfrom your Assembly that are shorter than a given length.'+color.ORANGE+\ + '\n\nA good minimum length to start with is 200bp.'+color.END+color.BOLD+\ + '\n\nThe minimum length value should be adjusted for your data sets.\n'+color.END+usage_msg(), + usage=SUPPRESS,formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+" Fasta file of Protein/Nucleotide sequences\n"+color.END) + + required_arg_group.add_argument('--output_file','-out', + help=color.BOLD+color.GREEN+" Desired Output Name\n\n"+color.END) + + required_arg_group.add_argument('--minLen','-min', default=200, type=int, + help=color.BOLD+color.GREEN+" Minimum number of base pairs for contigs\n (default = 200)"+color.END) + required_arg_group.add_argument('--maxLen','-max', default=15000, type=int, + help=color.BOLD+color.GREEN+" Minimum number of base pairs for contigs\n (default = 15000)"+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + + optional_arg_group.add_argument('--spades','-spades', action='store_true', + help=color.BOLD+color.GREEN+'rnaSPAdes transcriptome assembly\n'+color.END) + + optional_arg_group.add_argument('--genbank','-gb', action='store_true', + help=color.BOLD+color.GREEN+'Assembly from Genbank\n (Will include Accession Number in'\ + ' contig name)\n'+color.END) + + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Print author contact information\n'+color.END) + + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + args = parser.parse_args() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 1_ContigFiltStats.py'\ + ' --input_file ../Op_me_Xxma_rnaSPAdes_scaffolds_15_05.fasta --output_file '\ + 'Op_me_Xxma --minLen 200 --spades'+color.END + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + if args.output_file == None: + valid_arg += 1 + + return valid_arg + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + Home_folder_name = args.output_file + + if os.path.isdir(args.output_file) != True: + os.system('mkdir ' + args.output_file) + + if os.path.isdir(args.output_file + '/OriginalFasta/') != True: + os.system('mkdir ' + args.output_file +'/OriginalFasta/') + + if os.path.isdir(args.output_file + '/SizeFiltered/') != True: + os.system('mkdir ' + args.output_file +'/SizeFiltered/') + + if os.path.isdir('/'.join(args.output_file.split('/')[:-1]) + '/XlaneBleeding/') != True: + os.system('mkdir ' + '/'.join(args.output_file.split('/')[:-1]) + '/XlaneBleeding/') + + +########################################################################################### +###---------- Renames the Contigs, Writes them out, and Calculates Basic Info ----------### +########################################################################################### + +def rename_Transcriptome(args): + + home_folder = args.output_file + '/SizeFiltered/' + + print (color.BOLD+'\n\nPrepping '+color.GREEN+args.input_file.split('/')[-1]+color.END) + + inFasta = [i for i in SeqIO.parse(args.input_file,'fasta') if len(i.seq) >= args.minLen and len(i.seq) <= args.maxLen] + inFasta.sort(key=lambda seq_rec: -len(seq_rec.seq)) + + renamed_seqs = [] + seq_code_dict = {} + + count = 1 + + seq_name_start = 'Contig' + + if args.genbank == True: + for seq_rec in inFasta: + seq_code_dict.setdefault(seq_rec.id,[]).append(seq_rec.id.split('_')[-1].split('.')[0]+'_Contig_'+str(count)+'_Len'+str(len(seq_rec.seq))) + seq_code_dict.setdefault(seq_rec.id,[]).append(str(seq_rec.seq).upper()) + renamed_seqs.append('>'+seq_rec.id.split('_')[-1].split('.')[0]+'_Contig_'+str(count)+'_Len'+str(len(seq_rec.seq))+'\n'+str(seq_rec.seq).upper()) + count += 1 + elif args.spades == True: + for seq_rec in inFasta: + seq_code_dict.setdefault(seq_rec.description,[]).append(seq_name_start+'_'+str(count)+'_Len'+str(len(seq_rec.seq))+'_Cov'+str(int(round(float(seq_rec.description.split('_')[-3]))))) + seq_code_dict.setdefault(seq_rec.description,[]).append(seq_rec.description.split('_')[5]) + seq_code_dict.setdefault(seq_rec.description,[]).append(str(seq_rec.seq).upper()) + renamed_seqs.append('>'+seq_name_start+'_'+str(count)+'_Len'+str(len(seq_rec.seq))+'_Cov'+str(int(round(float(seq_rec.description.split('_')[-3]))))+'\n'+str(seq_rec.seq).upper()) + count += 1 + else: + for seq_rec in inFasta: + seq_code_dict.setdefault(seq_rec.description,[]).append(seq_name_start+'_'+str(count)+'_Len'+str(len(seq_rec.seq))) + seq_code_dict.setdefault(seq_rec.description,[]).append(str(seq_rec.seq).upper()) + renamed_seqs.append('>'+seq_name_start+'_'+str(count)+'_Len'+str(len(seq_rec.seq))+'\n'+str(seq_rec.seq).upper()) + count += 1 + + print (color.BOLD+'\n\nThere are '+color.RED+str(len(renamed_seqs))+' contigs > '+str(args.minLen)\ + +color.END+color.BOLD+' in '+color.DARKCYAN+args.input_file.split('/')[-1]+color.END) + + with open(home_folder + args.output_file.split('/')[-1] + '.' + str(args.minLen)+'bp.fasta','w+') as w: + for seq in renamed_seqs: + w.write(seq+'\n') + + if args.spades != True: + with open(home_folder + args.output_file.split('/')[-1] + '.' + str(args.minLen) + 'bp.SeqCodes.tsv','w+') as w: + w.write('Original Name\tNew Name\tSeq Length\t Seq GC\n') + for k, v in seq_code_dict.items(): + w.write(k+'\t'+v[0]+'\t'+str(len(v[1]))+'\t'+str(GC(v[1]))+'\n') + else: + with open(home_folder + args.output_file.split('/')[-1] + '.' + str(args.minLen) + 'bp.SeqCodes.tsv','w+') as w: + w.write('Original Name\tNew Name\tSeq Length\tSeq GC\tSeq Coverage\n') + for k, v in seq_code_dict.items(): + w.write(k+'\t'+v[0]+'\t'+str(len(v[2]))+'\t'+str(GC(v[2]))+'\t'+str(v[1])+'\n') + + +########################################################################################### +###-------------------------- Cleans Up the PostAssembly Folder ------------------------### +########################################################################################### + +def clean_up(args): + + os.system('cp ' + args.input_file + ' ' + args.output_file + '/OriginalFasta/' + args.input_file.split('/')[-1].replace('.fasta', '.Original.fasta')) + + os.system('cp ' + args.output_file + '/SizeFiltered/' + args.output_file.split('/')[-1] + '.' + str(args.minLen)+'bp.fasta ' + '/'.join(args.output_file.split('/')[:-1]) + '/XlaneBleeding/') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + print (color.BOLD+'\n\nLook for '+color.DARKCYAN+args.output_file+'.'+str(args.minLen)+\ + 'bp.fasta'+color.END+color.BOLD+'\n\n') + + print ('Next Script is: '+color.GREEN+'2_Auto_rRNA_BvE.py'+color.END\ + +color.BOLD+'\n(Alternatively'+color.GREEN+' 2a_remove_rRNA.py followed by 2b_remove_Bact.py'\ + +color.END+color.BOLD+')\n\n'+ color.END) + + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + args = check_args() + + prep_folders(args) + + temp = rename_Transcriptome(args) + + clean_up(args) + + next_script(args) + +main() + diff --git a/PTL1/Transcriptomes/Scripts/1b_XSpeciesContaminationAgnes.py b/PTL1/Transcriptomes/Scripts/1b_XSpeciesContaminationAgnes.py new file mode 100644 index 0000000..bf697ed --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/1b_XSpeciesContaminationAgnes.py @@ -0,0 +1,153 @@ +#!/usr/bin/python3 + +__author__ = 'Jean-David Grattepanche' +__version__ = 'ACL fixed sequence naming issue Feb 23, 2022' +__email__ = 'jeandavid.grattepanche@gmail.com' + + + +import sys +import os +import re +import time +import string +import os.path +from Bio import SeqIO +from sys import argv +listtaxa=[] +toosim = 0.99 +seqcoverage = 0.7 + +def merge_files(folder, minlen, conspecific_names): + mergefile = open('/'.join(folder.split('/')[:-1]) + '/forclustering.fasta','w+') + print("MERGE following files") + for taxafile in os.listdir(folder): + if taxafile[0] != ".": + listtaxa.append(taxafile.split('.' + str(minlen) + 'bp')[0]) + + for line2 in SeqIO.parse(folder+'/'+taxafile, 'fasta'): + if int(len(str(line2.seq))) >= int(minlen): + mergefile.write('>'+taxafile.split('.' + str(minlen) + 'bp')[0] + '_' + line2.description + '\n' + str(line2.seq) + '\n') + else: + print(line2, " is too short") + mergefile.close() + + sort_cluster(folder, listtaxa, minlen, conspecific_names) + + +def sort_cluster(folder, listtaxa, minlen, conspecific_names): + if not os.path.exists('/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/'): + os.makedirs('/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/') + + fastalist = []; fastadict= {} + conspecific_names_dict = { line.split('\t')[0] : line.split('\t')[1].strip() for line in open(conspecific_names) } + + print('CREATE a dictionnary of sequences') + for record in SeqIO.parse(open('/'.join(folder.split('/')[:-1]) + '/forclustering.fasta','r'),'fasta'): + if record.id[:10] not in conspecific_names_dict: + print('\nError in cross-plate contamination assessment: the ten-digit code ' + record.id[:10] + ' is not found in the conspecific names file. Please check that this file is correct and try again.\n') + quit() + + IDL = record.description, int(record.description.split('_Cov')[1].replace('\n','')) + fastalist.append(IDL) + fastadict[record.description] = record.seq + + print("CLUSTER sequences that overlap at least 70%") + os.system('vsearch --cluster_fast ' + '/'.join(folder.split('/')[:-1]) + '/forclustering.fasta --strand both --query_cov '+str(seqcoverage)+' --id '+str(toosim) +' --uc ' + '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/results_forclustering.uc --threads 60' ) + + #input2 = open('/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/results_forclustering.uc','r') + #input2 = open('/Output_PostClusterBackup/clusteringresults_vsearch/results_forclustering.uc','r') + cluster_output = '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/results_forclustering.uc' + out2 = open('/'.join(folder.split('/')[:-1]) + '/fastatokeep.fas','w+') + out3 = open('/'.join(folder.split('/')[:-1]) + '/fastatoremoved.fas','w+') + out4 = open('/'.join(folder.split('/')[:-1]) + '/fastatoremoved.uc','w+') + print("CREATE a dictionary with clustering results") + clustdict= {}; clustlist = []; allseq = []; clustline = {}; list= []; i=0; j=0 + for row2 in open(cluster_output, 'r'): + if row2.split('\t')[0] == 'C' and int(row2.split('\t')[2]) < 2: # keep all unique sequences + out2.write('>'+row2.split('\t')[8] + '\n' + str(fastadict[row2.split('\t')[8]])+ '\n') + if row2.split('\t')[0] == 'C' and int(row2.split('\t')[2]) > 1: # create another dictionary +# print("create dico: ", row2.split('\t')[8]) + clustdict.setdefault(row2.split('\t')[8], [row2.split('\t')[8]]) + clustlist.append(row2.split('\t')[8]) + + for row3 in open(cluster_output, 'r'): + if row3.split('\t')[0] == 'H': +# print("add dico: ", row3.split('\t')[9], row3.split('\t')[8]) + clustdict[row3.split('\t')[9].replace('\n','')].append(row3.split('\t')[8].replace('\n','')) + clustline[row3.split('\t')[8].replace('\n','')] = row3.replace('\n','') + clustline[row3.split('\t')[9].replace('\n','')] = row3.replace('\n','') + + print("PARSE the clusters: keep seed sequences (highest coverage) for each cluster") + for clust in clustlist: + list = sorted(clustdict[clust], reverse = True, key=lambda x: int(x.split('_Cov')[1])) + master = list[0] + Covmaster = int(list[0].split('_Cov')[1]) + master8dig = ('_').join(list[0].split('_')[0:3])[:-2] + for seq in list: + clustered = seq.replace('\n','') + Covclustered = int(clustered.split('_Cov')[1]) + clustered8dig = ('_').join(clustered.split('_')[0:3])[:-2] +# print(master8dig, Covmaster, '//', clustered8dig, Covclustered) + if float(Covmaster/Covclustered) < 10: + out2.write('>'+clustered + '\n' + str(fastadict[clustered])+ '\n') + i +=1 + elif conspecific_names_dict[master[:10]] == conspecific_names_dict[clustered[:10]]: + out2.write('>'+clustered + '\n' + str(fastadict[clustered])+ '\n') + i +=1 + elif Covclustered >= 50: + out2.write('>'+clustered + '\n' + str(fastadict[clustered])+ '\n') + i +=1 + else: + j +=1 + out4 = open('/'.join(folder.split('/')[:-1]) + '/fastatoremoved.uc','a') + out3.write('>'+clustered + '\n' + str(fastadict[clustered])+ '\n') + print(clustline[clustered],'\t' , master ) + out4.write(clustline[clustered]+ '\t' + master + '\n') + out4.close() + + + print('there are ', str(i),' sequences kept and ',str(j),' sequences removed') + + out2.close() + out3.close() + + splittaxa(folder, listtaxa, minlen) + +def splittaxa(folder, listtaxa, minlen): + for taxa in listtaxa: + tax_sf_path = '/'.join(folder.split('/')[:-1]) + '/' + taxa + '/SizeFiltered/' + os.system('mv ' + tax_sf_path + taxa + '.' + str(minlen) + 'bp.fasta' + ' ' + tax_sf_path + taxa + '.' + str(minlen) + 'bp.preXPlate.fasta') + + with open(tax_sf_path + taxa + '.' + str(minlen) + 'bp.fasta','w') as o: + for kept in SeqIO.parse('/'.join(folder.split('/')[:-1]) + '/fastatokeep.fas','fasta'): + if taxa in kept.description: + o.write('>' + kept.description.replace(taxa + '_', '') + '\n' + str(kept.seq) + '\n') + + os.system('mv ' + '/'.join(folder.split('/')[:-1]) + '/fastatokeep.fas ' + '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/') + os.system('mv ' + '/'.join(folder.split('/')[:-1]) + '/fastatoremoved.fas ' + '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/') + os.system('mv ' + '/'.join(folder.split('/')[:-1]) + '/fastatoremoved.uc ' + '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/') + os.system('mv ' + '/'.join(folder.split('/')[:-1]) + '/forclustering.fasta ' + '/'.join(folder.split('/')[:-1]) + '/clusteringresults_vsearch/') + +def main(): + + script, folder, minlen, conspecific_names = argv + merge_files(folder, minlen, conspecific_names) + +main() + + + + + + + + + + + + + + + + diff --git a/PTL1/Transcriptomes/Scripts/2a_remove_rRNA.py b/PTL1/Transcriptomes/Scripts/2a_remove_rRNA.py new file mode 100644 index 0000000..d9c7719 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/2a_remove_rRNA.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 18_08_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 2a_remove_rDNA.py --help + +########################################################################################## +## This script is intended to identify and isolate SSU/LSU sequences ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Have the Databases set up correctly (e.g. with BLAST or Diamond) and in their ## +## respective folders! See the manual if you need help ## +## ## +## COMMAND Example Below ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 2b_removeBact.py ## +## ## +########################################################################################## + + +import argparse, os, sys +from argparse import RawTextHelpFormatter,SUPPRESS +from Bio import SeqIO + + +#------------------------------ Colors For Print Statements ------------------------------# + +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD+'\nThis script will remove '+color.RED+'rDNA contigs (both SSU and LSU)'+color.END\ + +color.BOLD+'\nfrom your Assembly using a set of '+color.RED+'SSU/LSU rDNAs '+color.END\ + +color.BOLD+'from diverse\n'+color.ORANGE+'Eukaryotes, Bacteria and Archaea'+color.END\ + +color.BOLD+'.'+color.END+usage_msg(), usage=SUPPRESS,formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+"Fasta file of Nucleotide sequences"+color.END) + required_arg_group.add_argument('--databases','-d', action='store', + help=color.BOLD+color.GREEN+"Path to databases"+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + optional_arg_group.add_argument('--threads','-t', default='2', + help=color.BOLD+color.GREEN+' Number of threads to use for BLAST\n (default = 2)\n'+color.END) + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Print author contact information\n'+color.END) + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 2a_remove_rRNA.py --input_file ../Op_me_Xxma_rna.200bp.fasta'+color.END + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + elif args.input_file.endswith('bp.fasta') != True: + print (color.BOLD + '\n\nCheck that you are giving an appropriately Named/Processed'\ + 'Fasta file(s) to this script\n\nNOTE that this script CURRENTLY expects your'\ + ' Fasta files to contain '+color.RED+ '"rna"'+color.END+color.BOLD+' in \nthe Fasta File'\ + ' Name and must end with ' + color.RED + '"bp.fasta"\n\n' + color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + if os.path.isdir(args.databases + '/db_BvsE') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + +color.ORANGE+'db_BvsE Folder!\n\n'+color.END+color.BOLD+'Ensure that this folder '\ + 'can be found in the main '+color.ORANGE+'Databases Folder'+color.END+color.BOLD\ + +'\n\nThen try once again.') + valid_arg += 1 + elif os.path.isfile(args.databases + '/db_BvsE/SSULSUdb.nhr') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + 'BLAST+ formatted '+color.ORANGE+'SSU-LSU databases!\n\n'+color.END+color.BOLD+\ + 'Ensure that they can be found in the '+color.ORANGE+'db_BvsE folder'+color.END+\ + color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\ + color.END+color.BOLD+'\n\nThen try once again.') + valid_arg += 1 + + return valid_arg + + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + code = args.input_file.split('/')[-1][:10] + rRNA_folder = args.input_file.split('SizeFiltered')[0] + '/rRNA_Removal/' + + if os.path.isdir(rRNA_folder) != True: + os.system('mkdir '+rRNA_folder) + + return code, rRNA_folder + + +########################################################################################### +###---------------------- Uses BLAST to identify SSU/LSU Sequences ---------------------### +########################################################################################### + +def remove_rDNA(args, rRNA_folder): + + blast_output = rRNA_folder + args.input_file.split('/')[-1].split('.200bp.fasta')[0]+'_allSSULSUresults.tsv' + + BLASTN_cmd = 'blastn -query ' + args.input_file + ' -evalue 1e-10 -max_target_seqs 1 -outfmt'\ + ' 6 -db ' + args.databases + '/db_BvsE/SSULSUdb -num_threads 2 -out ' + blast_output + + print (color.BOLD+'\n\nBLASTing '+color.DARKCYAN+args.input_file.split('/')[-1]+color.END\ + +color.BOLD+ ' against the rDNA database\n\n' + color.END) + + os.system(BLASTN_cmd) + + rDNA_Hits = list(set([i.split('\t')[0] for i in open(blast_output).readlines()])) + + print (color.BOLD+'Binning Sequences from '+color.DARKCYAN+args.input_file.split('/')[-1]\ + +color.END+color.BOLD+'\nas rDNA OR Potentially Protein-Coding\n\n'+color.END) + + no_SSULSU = 0 + with_SSULSU = 0 + + inFasta = [seq_rec for seq_rec in SeqIO.parse(args.input_file,'fasta')] + + with open(rRNA_folder + args.input_file.split('/')[-1].split('.200bp.fasta')[0]+'_rRNAseqs.fasta','w+') as HasSSU: + for seq_rec in inFasta: + if seq_rec.description in rDNA_Hits: + HasSSU.write('>'+seq_rec.description+'\n'+str(seq_rec.seq)+'\n') + with_SSULSU += 1 + + with open(rRNA_folder + args.input_file.split('/')[-1].split('.200bp.fasta')[0] + '_NorRNAseqs.fasta','w+') as NoSSU: + for seq_rec in inFasta: + if seq_rec.description not in rDNA_Hits: + NoSSU.write('>'+seq_rec.description+'\n'+str(seq_rec.seq)+'\n') + no_SSULSU += 1 + + return str(with_SSULSU), str(no_SSULSU) + + +########################################################################################### +###--------------------------- Updates Log of SSU/LSU Removal --------------------------### +########################################################################################### + +def update_log(args, with_SSU, no_SSU): + + if os.path.isdir('../PostAssembly_Logs/') != True: + os.system('mkdir ../PostAssembly_Logs/') + + print (color.BOLD+'There are '+color.RED+with_SSU+' rRNA contigs'+color.END+color.BOLD\ + +' and '+color.PURPLE+no_SSU+' Putative Protein-coding contigs'+color.END+color.BOLD\ + +'\nin '+color.DARKCYAN+args.input_file.split('/')[1]+'\n' + color.END) + + with open('../PostAssembly_Logs/'+args.input_file.split('/')[1].split('.fas')[0]+'.Log.txt','a') as LogFile: + LogFile.write('rDNA Contigs\t'+with_SSU+'\tn/a\tn/a\n') + LogFile.write('Non-rDNA Contigs\t'+no_SSU+'\tn/a\tn/a\n') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + print (color.BOLD+'\nLook for '+color.ORANGE+args.input_file.split('/')[1].split('_rna')[0]\ + + '_NorRNAseqs.fasta'+color.END+color.BOLD+'\nin the '+args.input_file.split('/')[1].split('_rna')[0]\ + +' Folder\n\n' + color.END) + print (color.BOLD + 'Next Script is: ' + color.GREEN + '2b_remove_Bact.py\n\n'+ color.END) + + +########################################################################################### +###-------------------------- Cleans Up the PostAssembly Folder ------------------------### +########################################################################################### + +def clean_up(args): + + home_folder = args.input_file.split('SizeFiltered')[0] + + os.system('cp ' + home_folder + 'rRNA_Removal/*NorRNA*.fasta ' + home_folder) + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + args = check_args() + + code, rRNA_folder = prep_folders(args) + + with_SSULSU, no_SSULSU = remove_rDNA(args, rRNA_folder) + + #update_log(args, with_SSULSU, no_SSULSU) + + clean_up(args) + + next_script(args) + +main() + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/PTL1/Transcriptomes/Scripts/2b_remove_Bact.py b/PTL1/Transcriptomes/Scripts/2b_remove_Bact.py new file mode 100644 index 0000000..a67ee47 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/2b_remove_Bact.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 18_08_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 2b_remove_Bact.py --help + +########################################################################################## +## This script is intended to identify and isolate SSU/LSU sequences ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Have the Databases set up correctly (e.g. with BLAST or Diamond) and in their ## +## respective folders! See the manual if you need help ## +## 4. Run removeSSU.py on your Fasta file ## +## ## +## COMMAND Example Below ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 3_CountOGsDiamond.py ## +## ## +########################################################################################## + + +import argparse, os, sys +from argparse import RawTextHelpFormatter,SUPPRESS +from distutils import spawn +from Bio import SeqIO + + +#------------------------------ Colors For Print Statements ------------------------------# +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###---------------------------- UPDATE DIAMOND PATH BELOW! -----------------------------### +########################################################################################### + ## IF Diamond is IN YOUR PATH then no updating is needed... + +def check_diamond_path(): + + diamond_path = '' + + if diamond_path == '': + diamond_path = spawn.find_executable("diamond") + #diamond_path = '/path/to/diamond' + else: + pass + + if diamond_path == None: + print (color.BOLD + '\n\nPlease open this script and check that you have included'\ + +' the PATH to the'+color.BLUE+' "Diamond" '+color.END+color.BOLD+'executable.\n\n'+color.END) + print (color.BOLD+color.BLUE+'LOOK FOR:\n\n'+color.RED\ + +'#------------------------------ UPDATE DIAMOND PATH BELOW! -------------------------------#'\ + +color.BLUE+'\n\nThis is somewhere around lines 50 - 80...\n\n'+color.END) + + sys.exit() + else: + pass + + return diamond_path + + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD + '\nThis script will categorize Contigs as'+color.ORANGE+' STRONGLY '+color.END\ + +color.BOLD+color.RED+'Eukaryotic \nOR Prokaryotic'+color.END+color.BOLD+' using a set of Proteins'\ + ' from diverse\n'+color.ORANGE+'Eukaryotes, Bacteria and Archaea'+color.END\ + +color.BOLD+'.'+color.END+usage_msg(), usage=SUPPRESS,formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+'Fasta file of Nucleotide sequences (with rRNAs removed)'+color.END) + required_arg_group.add_argument('--databases','-d', action='store', + help=color.BOLD+color.GREEN+"Path to databases"+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Print author contact information\n'+color.END) + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 2b_remove_Bact.py --input_file'\ + ' ../Op_me_Xxma/Op_me_Xxma_NorRNAseqs.fasta'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + print(args.input_file) + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + elif args.input_file.endswith('NorRNAseqs.fasta') != True: + print (color.BOLD+'\n\nInvalid Fasta File! Only Fasta Files that were processed'\ + ' with '+color.GREEN+'2a_remove_rRNA.py '+color.END+color.BOLD+'are valid\n\n'\ + 'However, to bypass that issue, Fasta Files MUST end with '+color.CYAN+\ + '"NorRNAseqs.fas"\n\n'+color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + if os.path.isdir(args.databases + '/db_BvsE') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + +color.ORANGE+'db_BvsE Folder!\n\n'+color.END+color.BOLD+'Ensure that this folder '\ + 'can be found in the main '+color.ORANGE+'Databases Folder'+color.END+color.BOLD\ + +'\n\nThen try once again.') + valid_arg += 1 + elif os.path.isfile(args.databases + '/db_BvsE/eukout.dmnd') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + 'Diamond formatted '+color.ORANGE+'Eukaryotic Protein database!\n\n'+color.END+color.BOLD+\ + 'Ensure that it can be found in the '+color.ORANGE+'db_BvsE folder'+color.END+\ + color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\ + color.END+color.BOLD+'\n\nThen try once again.'+color.END) + elif os.path.isfile(args.databases + '/db_BvsE/micout.dmnd') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + 'Diamond formatted '+color.ORANGE+'Bacterial/Archaeal Protein database!\n\n'+color.END+color.BOLD+\ + 'Ensure that it can be found in the '+color.ORANGE+'db_BvsE folder'+color.END+\ + color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\ + color.END+color.BOLD+'\n\nThen try once again.'+color.END) + + valid_arg += 1 + + return valid_arg + + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + + BvE_folder = '/'.join(args.input_file.split('/')[:-1]) + '/BvE/' + + if os.path.isdir(BvE_folder) != True: + os.system('mkdir '+BvE_folder) + + +########################################################################################### +###---------------- Runs Diamond on Bact and Euk small RefSeq Databases ----------------### +########################################################################################### + +def ublast_BvE(args, diamond_path): + + BvE_folder = '/'.join(args.input_file.split('/')[:-1]) + '/BvE/' + mic_output = args.input_file.split('/')[-1]+'micresults.' + euk_output = args.input_file.split('/')[-1]+'eukresults.' + + print(color.BOLD+'\n\n"BLAST"-ing against PROK database using DIAMOND: ' + color.DARKCYAN + 'micout.dmnd' + color.END + '\n\n') + + Prok_diamond_cmd = diamond_path + ' blastx -q ' + args.input_file + ' --max-target-seqs 1 -d ' + args.databases + '/db_BvsE/micout.dmnd --evalue 1e-5 --threads 60 --outfmt 6 -o ' + BvE_folder + 'allmicresults.tsv' + + os.system(Prok_diamond_cmd) + + print(color.BOLD+'\n\n"BLAST"-ing against EUK database using DIAMOND: ' + color.DARKCYAN + 'eukout.dmnd' + color.END + '\n\n') + + Euk_diamond_cmd = diamond_path + ' blastx -q ' + args.input_file + ' --max-target-seqs 1 -d ' + args.databases + '/db_BvsE/eukout.dmnd --evalue 1e-5 --threads 60 --outfmt 6 -o ' + BvE_folder + 'alleukresults.tsv' + + os.system(Euk_diamond_cmd) + + + +########################################################################################### +###---------------- Compares Bacterial and Euk Hits for Classification -----------------### +########################################################################################### + +def compare_hits(args): + + BvE_folder = '/'.join(args.input_file.split('/')[:-1]) + '/BvE/' + + EukDict = {} + ProkDict = {} + CompDict = {} + + inFasta = [seq_rec for seq_rec in SeqIO.parse(args.input_file,'fasta')] + + for seq_rec in inFasta: + EukDict[seq_rec.description] = '' + ProkDict[seq_rec.description] = '' + CompDict[seq_rec.description] = [] + + inEukHits = [i for i in open(BvE_folder + 'alleukresults.tsv').readlines()] + inEukHits.sort(key=lambda x: (float(x.split('\t')[-2]), -int(x.split('\t')[3]))) + + inProkHits = [i for i in open(BvE_folder + 'allmicresults.tsv').readlines()] + inProkHits.sort(key=lambda x: (float(x.split('\t')[-2]), -int(x.split('\t')[3]))) + + for i in inEukHits: + if EukDict[i.split('\t')[0]] == '': + EukDict[i.split('\t')[0]] = float(i.split('\t')[-2]) + + for i in inProkHits: + if ProkDict[i.split('\t')[0]] == '': + ProkDict[i.split('\t')[0]] = float(i.split('\t')[-2]) + + for k in CompDict.keys(): + if EukDict[k] != '': + CompDict[k].append(EukDict[k]) + else: + CompDict[k].append('no hit') + if ProkDict[k] != '': + CompDict[k].append(ProkDict[k]) + else: + CompDict[k].append('no hit') + + for k, v in CompDict.items(): + + ### Contigs lacking STRONG Eukaryotic OR Prokaryotic Hits + if v[0] == 'no hit' and v[1] == 'no hit': + CompDict[k].append('UNDETERMINED') + + ### Contigs lacking STRONG Eukaryotic with a Prokaryotic Hit + elif v[0] != 'no hit' and v[1] == 'no hit': + CompDict[k].append('EUKARYOTIC') + + ### Contigs with a Eukaryotic but without a Prokaryotic Hit + elif v[0] == 'no hit' and v[1] != 'no hit': + CompDict[k].append('PROKARYOTIC') + + ### Uses Basic math to determine if contigs with are MORE Eukaryotic than Prokaryotic + else: + try: + prok_euk_ratio = float(v[1])/float(v[0]) + euk_prok_ratio = float(v[0])/float(v[1]) + + if prok_euk_ratio >= 100: + CompDict[k].append('EUKARYOTIC') + + elif euk_prok_ratio >= 1000: + CompDict[k].append('PROKARYOTIC') + + else: + CompDict[k].append('UNDETERMINED') + + except: + CompDict[k].append('divide by zero') + + with open(BvE_folder + 'comparisons.txt','w+') as w: + for k, v in CompDict.items(): + w.write(k+':'+':'.join([str(i) for i in v])+'\n') + + BvE_folder = '/'.join(args.input_file.split('/')[:-1]) + '/BvE/' + BvE_output_base = BvE_folder+args.input_file.split('/')[-1].split('.fas')[0] + +### Gathers the sequences and categorizes them + Euk_Fasta = sorted((seq_rec for seq_rec in inFasta if CompDict[seq_rec.description][-1] == 'EUKARYOTIC'), key=lambda x: -int(len(x.seq))) + Prok_Fasta = sorted((seq_rec for seq_rec in inFasta if CompDict[seq_rec.description][-1] == 'PROKARYOTIC'), key=lambda x: -int(len(x.seq))) + Und_Fasta = sorted((seq_rec for seq_rec in inFasta if CompDict[seq_rec.description][-1] == 'UNDETERMINED'), key=lambda x: -int(len(x.seq))) + Zero_Fasta = sorted((seq_rec for seq_rec in inFasta if CompDict[seq_rec.description][-1] == 'divide by zero'), key=lambda x: -int(len(x.seq))) + + +### Writes out all of the categorized sequences + with open(args.input_file.split('NorRNA')[0] + 'WTA_EPU.fasta', 'w') as epu: + with open(BvE_output_base+'.Not_Bact.fasta','w+') as nb: + for euk_seq in Euk_Fasta: + nb.write('>' + euk_seq.description + '\n' + str(euk_seq.seq) + '\n') + epu.write('>' + euk_seq.description + '_E' + '\n' + str(euk_seq.seq) + '\n') + + + with open(BvE_output_base+'.Bact_Hit.fasta','w+') as pr: + for prok_seq in Prok_Fasta: + pr.write('>' + prok_seq.description + '\n' + str(prok_seq.seq) + '\n') + epu.write('>' + prok_seq.description + '_P' + '\n' + str(prok_seq.seq) + '\n') + + with open(BvE_output_base+'.Undetermined.fasta','w+') as und: + for und_seq in Und_Fasta: + und.write('>' + und_seq.description + '\n' + str(und_seq.seq) + '\n') + epu.write('>' + und_seq.description + '_U' + '\n' + str(und_seq.seq) + '\n') + + if len(Zero_Fasta) != 0: + with open(BvE_output_base+'.DivideByZero.fasta','w+') as w: + for zero_seq in Zero_Fasta: + w.write('>' + zero_seq.description + '\n' + str(zero_seq.seq) + '\n') + epu.write('>' + zero_seq.description + '_U' + '\n' + str(zero_seq.seq) + '\n') + else: + pass + + return str(len(Euk_Fasta)), str(len(Prok_Fasta)), str(len(Und_Fasta)) + + +########################################################################################### +###----------------------- Updates Log of Prok vs Euk Comparisons ----------------------### +########################################################################################### + +def update_log(args, Euk_Contigs, Prok_Contigs, Und_Contigs): + + if os.path.isdir('../PostAssembly_Logs/') != True: + os.system('mkdir ../PostAssembly_Logs/') + else: + pass + + print (color.BOLD +'\n\nThere are '+color.RED+Prok_Contigs+' Strongly Prokaryotic contigs'+color.END\ + +color.BOLD+',\n'+color.ORANGE+Euk_Contigs+' Strongly Eukaryotic contigs'+color.END\ + +color.BOLD+',\nand '+color.PURPLE+Und_Contigs+' Undetermined Contigs\n'+color.END\ + +color.BOLD+'in '+args.input_file.split('/')[-1]+color.END) + + for Logname in os.listdir(os.curdir+'./PostAssembly_Logs/'): + if Logname.startswith(args.input_file.split('/')[-1].split('_No')[0]) and Logname.endswith('Log.txt'): # ACL - ??? + with open('../PostAssembly_Logs/'+Logname,'a') as Logfilename: + Logfilename.write('Prokaryotic Contigs\t'+Prok_Contigs+'\tn/a\tn/a\n') + Logfilename.write('Eukaryotic Contigs\t'+Euk_Contigs+'\tn/a\tn/a\n') + Logfilename.write('Undetermined Contigs\t'+Und_Contigs+'\tn/a\tn/a\n') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + print (color.BOLD+'\nLook for '+color.DARKCYAN+args.input_file.split('/')[-1]\ + .split('NorRNA')[0]+'WTA_EPU.fasta'+color.END+color.BOLD+' in the '\ + +args.input_file.split('/')[1]+' Folder\n\n' + color.END) + print (color.BOLD + 'Next Script is: ' + color.GREEN + '3_CountOGsDiamond.py\n\n'+ color.END) + + +########################################################################################## +###--------------------- Cleans up the Folder and Moves Final Files -------------------### +########################################################################################## + +def clean_up(args): + + home_folder = '/'.join(args.input_file.split('/')[:-1]) + + os.system('cp '+home_folder+'/*WTA_EPU.fasta '+home_folder+'/BvE/') + os.system('mv '+home_folder+'/*NorRNA*fasta '+home_folder+'/rRNA_Removal/') + + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + usearch_path = check_diamond_path() + + args = check_args() + + prep_folders(args) + + ublast_BvE(args, usearch_path) + + Euk_Contigs, Prok_Contigs, Und_Contigs = compare_hits(args) + + #update_log(args, Euk_Contigs, Prok_Contigs, Und_Contigs) + + clean_up(args) + + next_script(args) + +main() diff --git a/PTL1/Transcriptomes/Scripts/3_CountOGsDiamond.py b/PTL1/Transcriptomes/Scripts/3_CountOGsDiamond.py new file mode 100644 index 0000000..74b54d1 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/3_CountOGsDiamond.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 16_10_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 3_CountOGsDiamond.py --help + + +########################################################################################## +## This script is intended to classify the STRONGLY Eukaryotic and UNDETERMINED/UNKNOWN ## +## contigs into different OGs (e.g. orthologous gene-families) ## +## ## +## For more info about the OGs, check out: OrthoMCL.org ## +## ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Removed SSU/LSU sequences from your Fasta File ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 4_StopFrequency.py ## +## ## +########################################################################################## + +import argparse, os, sys, re +from argparse import RawTextHelpFormatter,SUPPRESS +from distutils import spawn +from Bio import SeqIO + + +#------------------------------ Colors For Print Statements ------------------------------# +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###---------------------------- UPDATE DIAMOND PATH BELOW! -----------------------------### +########################################################################################### + ## IF Diamond is IN YOUR PATH then no updating is needed... + +def check_diamond_path(): + + diamond_path = '' + + if diamond_path == '': + diamond_path = spawn.find_executable("diamond") + #diamond_path = '/path/to/diamond' + else: + pass + + if diamond_path == None: + print (color.BOLD + '\n\nPlease open this script and check that you have included'\ + +' the PATH to the'+color.BLUE+' "usearch" '+color.END+color.BOLD+'executable.\n\n'+color.END) + print (color.BOLD+color.BLUE+'LOOK FOR:\n\n'+color.RED\ + +'#------------------------------ UPDATE USEARCH PATH BELOW! -------------------------------#'\ + +color.BLUE+'\n\nThis is somewhere around lines 50 - 80...\n\n'+color.END) + + sys.exit() + else: + pass + + return diamond_path + + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD + '\n\nThis script will categorize Contigs into'+color.ORANGE+' "Homologous" '\ + +color.END+color.BOLD+'Gene Families (OGs)\nbased on '+color.RED+'OrthoMCL'+color.END\ + +color.BOLD+"'s Gene Family Grouping\n\n\nNotes on this script and "+color.GREEN+\ + 'OrthoMCL Families'+color.END+color.BOLD+' can be found\nat the bottom of '+color.GREEN\ + +'THIS script (3_CountOGsDiamond.py)\n'+color.END+usage_msg(), usage=SUPPRESS, + formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+'Fasta file of Nucleotide sequences enriched \nwith'\ + ' Eukaryotic protein coding transcripts'+color.END) + required_arg_group.add_argument('--databases','-g', action='store', + help=color.BOLD+color.GREEN+"Path to fasta file with Hook sequences"+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + optional_arg_group.add_argument('--threads','-t', default='2', + help=color.BOLD+color.GREEN+' Number of threads to use for BLAST\n (default = 2)\n'+color.END) + optional_arg_group.add_argument('--evalue','-e', default=1e-5, type = float, + help=color.BOLD+color.GREEN+' Maximum e-value for OG assignment\n (default = 1e-5)\n'+color.END) + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END) + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 3_CountOGsDiamond.py'\ + ' --input_file ../Op_me_Xxma/Op_me_Xxma_WTA_NBU.fasta'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + elif args.input_file.endswith('WTA_EPU.fasta') != True: + print (color.BOLD+'\n\nInvalid Fasta File! Only Fasta Files that were processed'\ + ' with '+color.GREEN+'2b_remove_Bact.py '+color.END+color.BOLD+'are valid\n\n'\ + 'However, to bypass that issue, Fasta Files MUST end with '+color.CYAN+\ + '"WTA_NBU.fasta"\n\n'+color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + if os.path.isdir(args.databases + '/db_OG') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + +color.ORANGE+'db_OG Folder!\n\n'+color.END+color.BOLD+'Ensure that this folder '\ + 'can be found in the main '+color.ORANGE+'Databases Folder'+color.END+color.BOLD\ + +'\n\nThen try once again\n\n.'+color.END) + valid_arg += 1 + + ogdb_count = 0 + for file in os.listdir(args.databases + '/db_OG'): + if file.endswith('.dmnd'): + ogdb_count += 1 + + if ogdb_count == 0: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + 'Diamond formatted '+color.ORANGE+'Gene Family databases!\n\n'+color.END+color.BOLD+\ + 'Ensure that they can be found in the '+color.ORANGE+'db_OG folder'+color.END+\ + color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\ + color.END+color.BOLD+'\n\nThen try once again.\n\n'+color.END) + valid_arg += 1 + elif ogdb_count > 1: + print('\nMultiple OG databases found. Please only provide 1 database in the db_OG folder.\n') + valid_arg += 1 + + return valid_arg + + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + + OG_folder = '/'.join(args.input_file.split('/')[:-1]) + '/DiamondOG/' + + if os.path.isdir(OG_folder) != True: + os.system('mkdir '+OG_folder) + + +########################################################################################### +###--------------------- Runs Diamond on Split OrthoMCL Databases ----------------------### +########################################################################################### + +def OG_diamond(args, diamond_path): + + print (color.BOLD+'\nStarting to "BLAST" against OG databases'+color.END) + + OG_folder = '/'.join(args.input_file.split('/')[:-1]) + '/DiamondOG/' + db = [file for file in os.listdir(args.databases + '/db_OG') if file.endswith('.dmnd')][0] + + print (color.BOLD + '\n\n"BLAST"-ing against OG database using DIAMOND: ' + color.DARKCYAN + db + color.END + '\n\n') + + OG_diamond_cmd = diamond_path + ' blastx -q ' + args.input_file + ' -d ' + args.databases + '/db_OG/' + db + ' --evalue ' + str(args.evalue) + ' --threads 60 --subject-cover 0.35 --outfmt 6 -o ' + OG_folder + 'allOGresults.tsv' + + os.system(OG_diamond_cmd) + + +########################################################################################### +###--------------- Keeps the Single BEST Hit (HSP-score) Per Transcript ----------------### +########################################################################################### + +def keep_best(args): + + print (color.BOLD+color.PURPLE+'\n\nProcessing OG-database results to keep only the BEST match for each transcript\n\n'+color.END) + + OG_folder = '/'.join(args.input_file.split('/')[:-1]) + '/DiamondOG/' + + inTSV = [i for i in open(OG_folder + 'allOGresults.tsv').readlines()] + + inTSV.sort(key = lambda x: -float(x.split('\t')[-1])) + + keep = [] + for i in inTSV: + if any(i.split('\t')[0] in j for j in keep) != True: + keep.append(i) + + updated_lines = list(set([line.split('\t')[0]+'_'+'_'.join(line.split('\t')[1].split('_')[-2:])+'\t'+'\t'.join(line.split('\t')[1:]) for line in keep])) + + with open(args.input_file.replace('.fasta','.Renamed_allOGCleanresults.tsv'), 'w+') as w: + for i in updated_lines: + w.write(i) + + +########################################################################################### +###-------- Copies and Updates Names of Transcripts With OG Hits to New Fasta ----------### +########################################################################################### + +def update_fasta(args): + + print (color.BOLD+color.PURPLE+'Updating Fasta File Sequence Names with their BEST OG hits\n\n'+color.END) + + Renamed_TSV = args.input_file.replace('.fasta','.Renamed_allOGCleanresults.tsv') + + keep = [i for i in open(Renamed_TSV).readlines() if i != '\n'] + + keep_dict = { } + for line in keep: + try: + og_number = re.split('OG.{1}_', line.split('\t')[1])[1][:6] + og_prefix = line.split('\t')[1].split(og_number)[0][-4:] + og = og_prefix + og_number + + keep_dict.update({ re.split('_OG.{1}_', line.split('\t')[0])[0] : re.split('_OG.{1}_', line.split('\t')[0])[0] + '_' + og_prefix + line.split('\t')[1].split('_')[-1] }) + except IndexError: + pass + + inFasta = [i for i in SeqIO.parse(args.input_file,'fasta')] + + updated_seq_name = ['>'+keep_dict[i.description]+'\n'+str(i.seq)+'\n' for i in inFasta if i.description in keep_dict.keys()] + + seqs_without_OG = ['>'+i.description+'\n'+str(i.seq)+'\n' for i in inFasta if i.description not in keep_dict.keys()] + + with open(args.input_file.replace('.fasta','.Renamed.fasta'),'w+') as w: + for i in updated_seq_name: + w.write(i) + + with open(args.input_file.replace('.fasta','.LackOG.fasta'),'w+') as x: + for i in seqs_without_OG: + x.write(i) + + +########################################################################################### +###-------------------- Updates Log With OG Assignment Information ---------------------### +########################################################################################### + +def update_log(args): + + if os.path.isdir('../PostAssembly_Logs/') != True: + os.system('mkdir ../PostAssembly_Logs/') + else: + pass + + home_folder = '/'.join(args.input_file.split('/')[:-1]) + '/' + + Renamed_TSV = home_folder+args.input_file.split('/')[-1].replace('.fasta','.Renamed_allOGCleanresults.tsv') + + keep = [line for line in open(Renamed_TSV).readlines()] + all_ogs = [line.split('\t')[1].split('_')[-1] for line in keep if len(re.split('_OG.{1}_', line.split('\t')[1])) > 1] + + total_with_ogs = str(len(all_ogs)) + unique_ogs = str(len(set(all_ogs))) + + print (color.BOLD +'There are '+color.BLUE +total_with_ogs+' Contigs'+color.END\ + +color.BOLD+' that hit '+color.DARKCYAN+unique_ogs+' Unique OGs\n'+color.END) + + + for Logname in os.listdir(os.curdir+'./PostAssembly_Logs/'): + if Logname.startswith(args.input_file.split('/')[2].split('_WTA')[0]) and Logname.endswith('Log.txt'): + with open('../PostAssembly_Logs/'+Logname,'a') as LogFile: + LogFile.write('Contigs With OG\t'+total_with_ogs+'\tn/a\tn/a\n') + LogFile.write('Unique OGs\t'+unique_ogs+'\tn/a\tn/a\n') + + +########################################################################################## +###--------------------- Cleans up the Folder and Moves Final Files -------------------### +########################################################################################## + +def clean_up(args): + + OG_folder = '/'.join(args.input_file.split('/')[:-1]) + '/DiamondOG/' + + os.system('rm ' + args.input_file) + + os.system('cp ' + args.input_file.replace('.fasta','.Renamed.fasta') + ' ' + OG_folder) + + os.system('cp ' + args.input_file.replace('.fasta','.Renamed_allOGCleanresults.tsv') + ' ' + OG_folder) + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + home_folder = '../'+args.input_file.split('/')[1]+'/' + + print (color.BOLD+'\nLook for '+color.DARKCYAN+args.input_file.split('/')[-1]\ + .replace('.fasta','WTA_EPU.fasta')+color.END+color.BOLD+' in the '+home_folder\ + +' Folder\n\n' + color.END) + + print (color.BOLD+'Next Script is: '+color.GREEN+'4_InFrameStopFreq.py\n\n'+ color.END) + + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + usearch_path = check_diamond_path() + + args = check_args() + + prep_folders(args) + + OG_diamond(args, usearch_path) + + keep_best(args) + + update_fasta(args) + + #update_log(args) + + clean_up(args) + + next_script(args) + +main() diff --git a/PTL1/Transcriptomes/Scripts/4_InFrameStopFreq.py b/PTL1/Transcriptomes/Scripts/4_InFrameStopFreq.py new file mode 100644 index 0000000..e5def97 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/4_InFrameStopFreq.py @@ -0,0 +1,790 @@ +#!/usr/bin/env python + +##__Updated__: 18_08_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 4_InFrameStopFreq.py --help + + +########################################################################################## +## This script is intended to aid in identifying the genetic code of the data given ## +## ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Removed SSU/LSU sequences from your Fasta File ## +## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ## +## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ## +## ## +## COMMAND Example Below ## +## Extra Notes at Bottom of Script ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 5_GCodeTranslate.py ## +## ## +########################################################################################## + + +import argparse, os, sys +from argparse import RawTextHelpFormatter,SUPPRESS +from distutils import spawn + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Data.CodonTable import CodonTable + + +#-------------------------- Set-up Codon Tables (Genetic Codes) --------------------------# + +tag_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Q', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAG']) + +c_uncinata_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'Q', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAA']) + +#------------------------------ Colors For Print Statements ------------------------------# +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###---------------------------- UPDATE DIAMOND PATH BELOW! -----------------------------### +########################################################################################### + ## IF Diamond is IN YOUR PATH then no updating is needed... + +def check_diamond_path(): + + diamond_path = '' + + if diamond_path == '': + diamond_path = spawn.find_executable("diamond") + #diamond_path = '/path/to/diamond' + else: + pass + + if diamond_path == None: + print (color.BOLD + '\n\nPlease open this script and check that you have included'\ + +' the PATH to the'+color.BLUE+' "diamond" '+color.END+color.BOLD+'executable.\n\n'+color.END) + print (color.BOLD+color.BLUE+'LOOK FOR:\n\n'+color.RED\ + +'#------------------------------ UPDATE DIAMOND PATH BELOW! -------------------------------#'\ + +color.BLUE+'\n\nThis is somewhere around lines 50 - 80...\n\n'+color.END) + + sys.exit() + else: + pass + + return diamond_path + + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD+'\n\nThis script is intended to '+color.RED+'AID You '+color.END+color.BOLD\ + +'in determining the '+color.RED+'\nLikely Genetic Code'+color.END+color.BOLD+' of a'\ + ' given Fasta File of transcripts\n\nInterpretation of the output (StopFreq.tsv) is difficult \nand so '+color.ORANGE\ + +'TWO EXAMPLES'+color.END+color.BOLD+' can be found in the '+color.CYAN+'NOTES Section'\ + +color.END+color.BOLD+' of\nTHIS Script '+color.GREEN+'(4_InFrameStopFreq.py)\n'+color.END\ + +usage_msg(), usage=SUPPRESS,formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', required=True, + help=color.BOLD+color.GREEN+'Fasta file of Nucleotide sequences enriched \nwith'\ + ' Eukaryotic protein coding transcripts'+color.END) + required_arg_group.add_argument('--databases','-d', action='store', + help=color.BOLD+color.GREEN+"Path to databases"+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END) + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 4_InFrameStopFreq.py'\ + ' --input_file ../Op_me_Xxma/Op_me_Xxma_WTA_EPU.Renamed.fasta'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + elif args.input_file.endswith('WTA_EPU.Renamed.fasta') != True: + print (color.BOLD+'\n\nInvalid Fasta File! Only Fasta Files that were processed'\ + ' with '+color.GREEN+'3_CountOGsUsearcy.py '+color.END+color.BOLD+'are valid\n\n'\ + 'However, to bypass that issue, Fasta Files MUST end with '+color.CYAN+\ + '"WTA_NBU.Renamed.fasta"\n\n'+color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + if os.path.isdir(args.databases + '/db_StopFreq') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + +color.ORANGE+'db_StopFreq Folder!\n\n'+color.END+color.BOLD+'Ensure that this folder '\ + 'can be found in the main '+color.ORANGE+'Databases Folder'+color.END+color.BOLD\ + +'\n\nThen try once again\n\n.'+color.END) + valid_arg += 1 + + elif os.path.isfile(args.databases + '/db_StopFreq/RepEukProts.dmnd') != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' Cannot find the '\ + 'Diamond formatted '+color.ORANGE+'Representative Eukaryotic Protein Database!\n\n'+color.END+color.BOLD+\ + 'Ensure that it can be found in the '+color.ORANGE+'db_StopFreq folder'+color.END+\ + color.BOLD+',\nwhich can be found in the main '+color.ORANGE+'Databases Folder'+\ + color.END+color.BOLD+'\n\nThen try once again.\n\n'+color.END) + valid_arg += 1 + + return valid_arg + + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + + Stop_folder = '../'+args.input_file.split('/')[1]+'/StopCodonFreq/' + + if os.path.isdir(Stop_folder) != True: + os.system('mkdir '+Stop_folder) + + if os.path.isdir(Stop_folder+'StopCodonFastas') != True: + os.system('mkdir '+Stop_folder+'StopCodonFastas') + + if os.path.isdir(Stop_folder+'SpreadSheets') != True: + os.system('mkdir '+Stop_folder+'SpreadSheets') + + return Stop_folder+'StopCodonFastas/' + + +########################################################################################### +###--------------------- Translates Sequences with Each Stop Codon ---------------------### +########################################################################################### + +def prep_translations(args): + print (color.BOLD+'\nIdentifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py\n'+color.END) + + intsv = [i for i in open(args.input_file.replace('.fasta','_allOGCleanresults.tsv')).readlines() if i != '\n'] + + inFasta = [i for i in SeqIO.parse(args.input_file,'fasta')] + + prot_dict = {} + + for i in intsv: +# print i + prot_dict.setdefault(i.split('\t')[0],[]) + if int(i.split('\t')[6]) < int(i.split('\t')[7]): + prot_dict[i.split('\t')[0]].append('F') + if (int(i.split('\t')[6])) < 5: + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[6])-1) + else: + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[6])-1) + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[7])+3) + if int(i.split('\t')[7]) < int(i.split('\t')[6]): + prot_dict[i.split('\t')[0]].append('RC') + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[6])) + if (int(i.split('\t')[7])-4) < 5: + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[7])) + else: + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[7])-4) + + + #------------- Prep translation with 'TAA' as the only Stop -------------# + + print (color.BOLD+'\n\nTranslating DNA using'+color.RED+' TAA'+color.END\ + +color.BOLD+' as the sole STOP codon\n'+color.END) + + for key, value in prot_dict.items(): + for seq_rec in inFasta: + if key in seq_rec.description: + stop_pos = 0 + if prot_dict[key][0] == 'F': + temp = seq_rec.seq[prot_dict[key][1]:] + temp_prot = str(temp.translate(table=c_uncinata_table)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][1]:prot_dict[key][2]]) + if prot_dict[key][0] == 'RC': + temp = seq_rec.seq[:prot_dict[key][1]].reverse_complement() + temp_prot = str(temp.translate(table=c_uncinata_table)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][2]:prot_dict[key][1]].reverse_complement()) + + + #------------- Prep translation with 'TGA' as the only Stop -------------# + print (color.BOLD+'\n\nTranslating DNA using'+color.RED+' TGA'+color.END\ + +color.BOLD+' as the sole STOP codon\n'+color.END) + + for key, value in prot_dict.items(): + for seq_rec in inFasta: + if key in seq_rec.description: + stop_pos = 0 + if prot_dict[key][0] == 'F': + temp = seq_rec.seq[prot_dict[key][1]:] + temp_prot = str(temp.translate(table=6)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][1]:prot_dict[key][2]]) + if prot_dict[key][0] == 'RC': + temp = seq_rec.seq[:prot_dict[key][1]].reverse_complement() + temp_prot = str(temp.translate(table=6)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][2]:prot_dict[key][1]].reverse_complement()) + + + #------------- Prep translation with 'TAG' as the only Stop -------------# + print (color.BOLD+'\n\nTranslating DNA using'+color.RED+' TAG'+color.END\ + +color.BOLD+' as the sole STOP codon\n'+color.END) + + + for key, value in prot_dict.items(): + for seq_rec in inFasta: + if key in seq_rec.description: + stop_pos = 0 + if prot_dict[key][0] == 'F': + temp = seq_rec.seq[prot_dict[key][1]:] + temp_prot = str(temp.translate(table=tag_table)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][1]:prot_dict[key][2]]) + if prot_dict[key][0] == 'RC': + temp = seq_rec.seq[:prot_dict[key][1]].reverse_complement() + temp_prot = str(temp.translate(table=tag_table)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[key].append(temp[:stop_pos]) + else: + prot_dict[key].append(seq_rec.seq[prot_dict[key][2]:prot_dict[key][1]].reverse_complement()) + + #------------ Parsing through data to maintain OG assignments ------------# + inOGs = intsv + inOGs = [i.split('\t')[0]+';'+i.split('\t')[1][-10:] for i in inOGs] + inOGs2 = [] + for i in inOGs: + if 'no_group' not in i.split(';')[1]: + inOGs2.append(i) + else: + inOGs2.append(i.split(';')[0]+';no_group') + inOGs2 = list(set(inOGs2)) + + #---------------- Write file with 'TAA' is the only Stop ----------------# + + with open(args.input_file.split('.fas')[0]+'_taa_ORF.fasta','w+') as w: + print (color.BOLD+'\n\nWriting FASTA files with ORF and Protein sequences with'+color.RED\ + +' TAA '+color.END+color.BOLD+'as only STOP codon\n'+color.END) + + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(value[-3]).upper()+'\n') + + with open(args.input_file.split('.fas')[0]+'_taa_ORF.aa.fasta','w+') as w: + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(Seq(str(value[-3])).translate(table=c_uncinata_table)).upper()+'\n') + + #---------------- Write file with 'TGA' is the only Stop ----------------# + + with open(args.input_file.split('.fas')[0]+'_tga_ORF.fasta','w+') as w: + print (color.BOLD+'\n\nWriting FASTA files with ORF and Protein sequences with'+color.RED\ + +' TGA '+color.END+color.BOLD+'as only STOP codon\n'+color.END) + + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(value[-2]).upper()+'\n') + + with open(args.input_file.split('.fas')[0]+'_tga_ORF.aa.fasta','w+') as w: + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(Seq(str(value[-2])).translate(table=6)).upper()+'\n') + + #---------------- Write file with 'TAG' is the only Stop ----------------# + + with open(args.input_file.split('.fas')[0]+'_tag_ORF.fasta','w+') as w: + print (color.BOLD+'\n\nWriting FASTA files with ORF and Protein sequences with'+color.RED\ + +' TAG '+color.END+color.BOLD+'as only STOP codon\n'+color.END) + + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(value[-1]).upper()+'\n') + + with open(args.input_file.split('.fas')[0]+'_tag_ORF.aa.fasta','w+') as w: + for key, value in prot_dict.items(): + for j in inOGs2: + if key == j.split(';')[0]: + if len(prot_dict[key]) < 4: + pass + else: + w.write('>'+key+'_'+j.split(';')[1]+'\n'+str(Seq(str(value[-1])).translate(table=tag_table)).upper()+'\n') + + +########################################################################################### +###---------- Diamonds the Translations Against a SMALL Euk Protein Database ----------### +########################################################################################### + +def diamond_ProtDB(args, diamond_path): + os.system(diamond_path + ' blastp -q ' + args.input_file.split('.fas')[0] + '_tag_ORF.aa.fasta -d ' + args.databases + '/db_StopFreq/RepEukProts.dmnd --evalue 1e-5 --max-target-seqs 1 --threads 60 --outfmt 6 -o ' + args.input_file.split('.fas')[0] + '_tag_ORF.RepEukProts.tsv') + + os.system(diamond_path + ' blastp -q ' + args.input_file.split('.fas')[0] + '_tga_ORF.aa.fasta -d ' + args.databases + '/db_StopFreq/RepEukProts.dmnd --evalue 1e-5 --max-target-seqs 1 --threads 60 --outfmt 6 -o ' + args.input_file.split('.fas')[0] + '_tga_ORF.RepEukProts.tsv') + + os.system(diamond_path + ' blastp -q ' + args.input_file.split('.fas')[0] + '_taa_ORF.aa.fasta -d ' + args.databases + '/db_StopFreq/RepEukProts.dmnd --evalue 1e-5 --max-target-seqs 1 --threads 60 --outfmt 6 -o ' + args.input_file.split('.fas')[0] + '_taa_ORF.RepEukProts.tsv') + + +########################################################################################### +###-------------------- Manages the search for In-Frame Stop Codons --------------------### +########################################################################################### + + +def hunt_for_stops(args): + + #------------------------ Open Fasta Files ------------------------# + try: + TAGinFasta = [i for i in SeqIO.parse(args.input_file.split('.fas')[0]+'_tag_ORF.fasta','fasta') if str(i.seq).endswith('TAG')] + print (color.BOLD+'\n\nGathering Sequence information from FASTA and TSV files\n'+color.END) + + except: + print (color.BOLD+color.RED+'\n\nMissing Necessary Inputs: Open Script for Usage'\ + ' Information\n\n'+color.END) + sys.exit() + + TGAinFasta = [i for i in SeqIO.parse(args.input_file.split('.fas')[0]+'_tga_ORF.fasta','fasta') if str(i.seq).endswith('TGA')] + + TAAinFasta = [i for i in SeqIO.parse(args.input_file.split('.fas')[0]+'_taa_ORF.fasta','fasta') if str(i.seq).endswith('TAA')] + + ## This section originally ONLY considered sequences WITH OG assignments: + ## TAAinFasta = [i for i in TAAinFasta if 'no_group' not in i.description and str(i.seq).endswith('TAA')] + ## This has been taken out for now + + #----------------------- Open BLAST Reports -----------------------# + + TAGinTSV = [i for i in open(args.input_file.split('.fas')[0]+'_tag_ORF.RepEukProts.tsv').read().split('\n') if i != ''] + + TGAinTSV = [i for i in open(args.input_file.split('.fas')[0]+'_tga_ORF.RepEukProts.tsv').read().split('\n') if i != ''] + + TAAinTSV = [i for i in open(args.input_file.split('.fas')[0]+'_taa_ORF.RepEukProts.tsv').read().split('\n') if i != ''] + +## This section originally ONLY considered sequences WITH OG assignments: + ## TAAinTSV = i for i in TAAinTSV if i != ''and 'no_group' not in i.split('\t')[0]] + ## This has been taken out for now + + + #------------ Set-up Genetic Code Specific Dictionaries ------------# + + tag_dict = {} + for i in TAGinTSV: + tag_dict.setdefault(i.split('\t')[0].replace('_TAG',''),[]).append(int(i.split('\t')[-6])) + tag_dict.setdefault(i.split('\t')[0].replace('_TAG',''),[]).append(int(i.split('\t')[-5])) + + tga_dict = {} + for i in TGAinTSV: + tga_dict.setdefault(i.split('\t')[0].replace('_Ciliate',''),[]).append(int(i.split('\t')[-6])) + tga_dict.setdefault(i.split('\t')[0].replace('_Ciliate',''),[]).append(int(i.split('\t')[-5])) + + taa_dict = {} + for i in TAAinTSV: + taa_dict.setdefault(i.split('\t')[0].replace('_Chilo',''),[]).append(int(i.split('\t')[-6])) + taa_dict.setdefault(i.split('\t')[0].replace('_Chilo',''),[]).append(int(i.split('\t')[-5])) + + #-------------- Preparing In-Frame Stop Codon Counts --------------# + +# All the data when TGA is the sole stop codon + tga_codons = 0 + tga_data_tag = 0 + tga_data_tga = 0 + tga_data_taa = 0 + tga_seq_count = 0 + +# All the data when TAG is the sole stop codon + tag_codons = 0 + tag_data_tag = 0 + tag_data_tga = 0 + tag_data_taa = 0 + tag_seq_count = 0 + +# All the data when TAA is the sole stop codon + taa_codons = 0 + taa_data_tag = 0 + taa_data_tga = 0 + taa_data_taa = 0 + taa_seq_count = 0 + +# All the data for each stop codon combined + tga_inframe = 0 + tag_inframe = 0 + taa_inframe = 0 + total_codons = 0 + total_seq_counts = len(open(args.input_file).read().split('>'))-1 + + + #-------- Gathering In-frame Stop Codon Density Information --------# + +### Collect in-frame stop information for "TAA" and "TAG" when TGA is the ONLY stop + print (color.BOLD+'\nCollecting in-frame stop codon information when'+color.RED\ + +' TGA'+color.END+color.BOLD+' is the only STOP\n'+color.END) + + for i in TGAinFasta: + try: + if tga_dict[i.description][0] == 1: + for n in range((tga_dict[i.description][0]-1),((tga_dict[i.description][1])*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + tga_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + tga_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + tga_data_tga += 1 + tga_inframe += 1 + tga_codons += 1 + total_codons += 1 + tga_seq_count += 1 + + else: + for n in range(((tga_dict[i.description][0]-1)*3),((tga_dict[i.description][1])*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + tga_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + tga_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + tga_data_tga += 1 + tga_inframe += 1 + tga_codons += 1 + total_codons += 1 + tga_seq_count += 1 + except: + pass + +### Collect in-frame stop information for "TAA" and "TGA" when TAG is the ONLY stop + print (color.BOLD+'\nCollecting in-frame stop codon information when'+color.RED\ + +' TAG'+color.END+color.BOLD+' is the only STOP\n'+color.END) + + for i in TAGinFasta: + try: + if tag_dict[i.description][0] == 1: + for n in range((tag_dict[i.description][0]-1),((tag_dict[i.description][1])*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + tag_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + tag_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + tag_data_tga += 1 + tga_inframe += 1 + tag_codons += 1 + total_codons += 1 + tag_seq_count += 1 + + else: + for n in range(((tag_dict[i.description][0]-1)*3),(tag_dict[i.description][1]*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + tag_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + tag_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + tag_data_tga += 1 + tga_inframe += 1 + tag_codons += 1 + total_codons += 1 + tag_seq_count += 1 + except: + pass + + +### Collect in-frame stop information for "TGA" and "TAG" when TAA is the ONLY stop + print (color.BOLD+'\nCollecting in-frame stop codon information when'+color.RED\ + +' TAA'+color.END+color.BOLD+' is the only STOP\n'+color.END) + + for i in TAAinFasta: + try: + if taa_dict[i.description][0] == 1: + for n in range((taa_dict[i.description][0]-1),((taa_dict[i.description][1])*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + taa_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + taa_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + taa_data_tga += 1 + tga_inframe += 1 + taa_codons += 1 + total_codons += 1 + taa_seq_count += 1 + + else: + for n in range(((taa_dict[i.description][0]-1)*3),(taa_dict[i.description][1]*3)-3,3): + if str(i.seq).upper()[n:n+3] == 'TAG': + taa_data_tag += 1 + tag_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TAA': + taa_data_taa += 1 + taa_inframe += 1 + if str(i.seq).upper()[n:n+3].upper() == 'TGA': + taa_data_tga += 1 + tga_inframe += 1 + tag_codons += 1 + total_codons += 1 + taa_seq_count += 1 + except: + pass + + #-------------- Writing Data Out and Print Statement --------------# + + with open(args.input_file.split('.fas')[0]+'_StopCodonStats.tsv','w+') as w: + w.write('Stop Codon\tNumber of Seqs Analyzed\tIn-frame TAG\tIn-frame TGA\tIn-frame TAA\tTotal Codons\tIn-frame TAG density\tIn-frame TGA density\tIn-frame TAA density\n') + if tga_codons != 0: + w.write('TGA\t'+str(tga_seq_count)+'\t'+str(tga_data_tag)+'\t'+str(tga_data_tga)+'\t'+str(tga_data_taa)+'\t'+str(tga_codons)\ + +'\t'+"%.2f" % ((float(tga_data_tag)*1000)/float(tga_codons))+'\t'+"%.2f" % ((float(tga_data_tga)*1000)/float(tga_codons))+'\t'\ + +"%.2f" % ((float(tga_data_taa)*1000)/float(tga_codons))+'\n') + else: + w.write('TGA\t0\t0\t0\t0\t0\t0\t0\t0\n') + + if tag_codons != 0: + w.write('TAG\t'+str(tag_seq_count)+'\t'+str(tag_data_tag)+'\t'+str(tag_data_tga)+'\t'+str(tag_data_taa)+'\t'+str(tag_codons)\ + +'\t'+"%.2f" % ((float(tag_data_tag)*1000)/float(tag_codons))+'\t'+"%.2f" % ((float(tag_data_tga)*1000)/float(tag_codons))+'\t'\ + +"%.2f" % ((float(tag_data_taa)*1000)/float(tag_codons))+'\n') + else: + w.write('TAG\t0\t0\t0\t0\t0\t0\t0\t0\n') + if taa_codons != 0: + w.write('TAA\t'+str(taa_seq_count)+'\t'+str(taa_data_tag)+'\t'+str(taa_data_tga)+'\t'+str(taa_data_taa)+'\t'+str(taa_codons)\ + +'\t'+"%.2f" % ((float(taa_data_tag)*1000)/float(taa_codons))+'\t'+"%.2f" % ((float(taa_data_tga)*1000)/float(taa_codons))+'\t'\ + +"%.2f" % ((float(taa_data_taa)*1000)/float(taa_codons))+'\n') + else: + w.write('TAA\t0\t0\t0\t0\t0\t0\t0\t0\n') + + w.write('\n \n') + w.write('Summary\t'+str(tga_seq_count+tag_seq_count+taa_seq_count)+'\t'+str(tag_inframe)+'\t'+str(tga_inframe)+'\t'+str(taa_inframe)\ + +'\t'+str(total_codons)+'\t'+"%.2f" % ((float(tag_inframe)*1000)/float(total_codons))+'\t'+"%.2f" % ((float(tga_inframe)*1000)/float(total_codons))\ + +'\t'+"%.2f" % ((float(taa_inframe)*1000)/float(total_codons))+'\n') + w.write('\nTotal Seqs in Fasta\t'+str(total_seq_counts)) + +# print color.BOLD + color.BLUE + '\nSummary\t'+str(tag_inframe)+'\t'+str(tga_inframe)+'\t'+str(taa_inframe)+'\t'+str(total_codons)+'\t'+"%.2f" % ((float(tag_inframe)*1000)/float(total_codons))+'\t'\ +# +"%.2f" % ((float(tga_inframe)*1000)/float(total_codons))+'\t'+"%.2f" % ((float(taa_inframe)*1000)/float(total_codons))+'\n\n'\ +# + str(tag_seq_count) + '\t' + str(tga_seq_count) + '\t' + str(taa_seq_count) + color.END + + +########################################################################################## +###--------------------- Cleans up the Folder and Moves Final Files -------------------### +########################################################################################## + +def clean_up(args): + if os.path.isdir('/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq') != True: + os.system('mkdir ' + '/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq/') + else: + pass + + os.system('mkdir ' + '/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq/StopCodonFastas/') + os.system('mkdir ' + '/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq/SpreadSheets/') + os.system('mv ' + args.input_file.split('.fas')[0]+'_t*_ORF.*fasta ' + '/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq/StopCodonFastas/') + os.system('mv ' + args.input_file.split('.fas')[0]+'_t*Prots.tsv ' + '/'.join(args.input_file.split('/')[:-1]) + '/StopCodonFreq/SpreadSheets/') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + home_folder = '/'.join(args.input_file.split('/')[:-1]) + + print (color.BOLD+'\nLook for '+color.DARKCYAN+args.input_file.split('/')[-1]\ + .replace('.fasta','_StopCodonStats.tsv')+color.END+color.BOLD+' in the '+home_folder\ + +' Folder\n\n' + color.END) + + print (color.BOLD+'Next Script is: '+color.GREEN+'5_GCodeTranslate.py\n\n'+ color.END) + + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + diamond_path = check_diamond_path() + + args = check_args() + + prep_translations(args) + + diamond_ProtDB(args, diamond_path) + + hunt_for_stops(args) + + clean_up(args) + + next_script(args) + +main() + + +#----------------------------------------- NOTES -----------------------------------------# +# +# This script is designed to HELP you make an informed decision about the genetic code being +# used by your particular organism. Be aware that it will be limited by the quality of the +# data given to it! +# +# You will need: +# +# Diamond, BioPython, AND the output from '3_CountOGSDiamond.py' +# +# If you are not using the Author's database, update your database name(s) in lines: 345-360 +# +# katzlab$ python StopFrequency.py YourFastaFile.fasta +# +# +#------------------------------- Interpretation of Results -------------------------------# +# +# FORMATTED BELOW WITH TEXTWRANGLER... +# +# Example output using CILIATE (TGA) genetic Code (NOTE THE In-Frame Densities): +# +# Stop Codon Number_of_Seqs_Analyzed In-frame TAG In-frame TGA In-frame TAA Total Codons In-frame TAG density In-frame TGA density In-frame TAA density +# TGA 341 14 0 22 113156 1.2 0 0.92 +# TAG 424 0 0 34 140085 0 0 0.78 +# TAA 205 14 0 0 16714 0.84 0 0 +# Summary 970 28 0 56 269955 2.04 0 1.7 +# +# VALUES in summary line (OR SUM of Density) that are > 1.5 likely indicate that the STOP +# codon has been reassigned... in the case above, TAG and TAA look like they have been +# reassigned. +# +# +# Example output using UNIVERSAL genetic Code (NOTE THE In-Frame Densities): +# +# Stop Codon Number_of_Seqs_Analyzed In-frame TAG In-frame TGA In-frame TAA Total Codons In-frame TAG density In-frame TGA density In-frame TAA density +# TGA 341 1 0 2 113156 0.2 0 0.05 +# TAG 424 0 2 4 140085 0 0 0.08 +# TAA 205 1 0 0 16714 0.04 0 0 +# Summary 970 2 2 6 269955 0.15 0 0.06 +# +# VALUES in summary line (OR SUM of Density) that are > 0.5 likely indicate that the STOP +# codon still acts as STOP... in the case above, TAG, TGA and TAA look like they still behave +# as a stop codon. +# +# THIS IS A ROUGH GUIDE FOR INTERPRETING THE RESULTS!!!! BE VERY VERY WARY! NUMBER OF TOTAL +# SEQUENCES AND TOTAL CODONS OBSERVED ARE IMPORTANT (TOO FEW AND ANY INTERPRETATION IS DEVOID +# OF ANY MEANING). diff --git a/PTL1/Transcriptomes/Scripts/5_GCodeTranslate.py b/PTL1/Transcriptomes/Scripts/5_GCodeTranslate.py new file mode 100644 index 0000000..4ea3c08 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/5_GCodeTranslate.py @@ -0,0 +1,770 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 20_09_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 5_GCodeTranslate.py --help + + +########################################################################################## +## This script is intended to aid in identifying the genetic code of the data given ## +## ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with 1_ContigFiltStats.py ## +## 3. Removed SSU/LSU sequences from your Fasta File ## +## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ## +## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ## +## 6. You either know (or have inferred) the genetic code of the organism ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 6_FilterPatials.py (in FinalizeTranscripts Folder) ## +## ## +########################################################################################## + +import argparse, os, re, sys +from argparse import RawTextHelpFormatter,SUPPRESS + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Data.CodonTable import CodonTable + + +#-------------------------- Set-up Codon Tables (Genetic Codes) --------------------------# + +blepharisma_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAA','TAG']) + +condylostoma_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Q', 'TAG': 'Q', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['']) + +c_uncinata_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'Q', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAA']) + +euplotes_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAA','TAG']) + +myrionecta_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Y', 'TAG': 'Y', + 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TGA']) + +no_stop_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'X', 'TAG': 'X', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'X', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['']) + +peritrich_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'E', 'TAG': 'E', + 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TGA']) + +tag_table = CodonTable(forward_table={ + 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', + 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', + 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Q', + 'TGT': 'C', 'TGC': 'C', 'TGA': 'Q', 'TGG': 'W', + 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', + 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', + 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', + 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}, + start_codons = [ 'ATG'], + stop_codons = ['TAG']) + + +#------------------------------ Colors For Print Statements ------------------------------# +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + PURPLE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###------------------------- Checks the Command Line Arguments -------------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD + '\n\nThis script will '+color.RED+'Translate '+color.END+color.BOLD+'a '\ + 'given Fasta file of CDS\nsequences using a given'+color.PURPLE+' Genetic Code.'+color.END+\ + color.BOLD+usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter) + + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+' Fasta file with CDSs\n'+color.END) + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + + optional_arg_group.add_argument('--genetic_code','-g', action='store', default='universal', + help=color.BOLD+color.GREEN+' Genetic code to use for translation\n (default = '\ + '"universal")\n'+color.END) + + optional_arg_group.add_argument('--no_RP','-no_RP', action='store_true', + help=color.BOLD+color.GREEN+' Allows files to "skip" the removal\n of Partial Transcripts\n'\ + +color.END) + + optional_arg_group.add_argument('--list_codes','-codes', action='store_true', + help=color.BOLD+color.GREEN+' Lists supported genetic codes\n'+color.END) + + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END) + + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + sys.exit() + + ### Adding in names to 'arg' class for more easy use throughout the script + args.ntd_out = args.input_file.split('.fas')[0]+'_'+args.genetic_code.title()+'_NTD.ORF.fasta' + args.aa_out = args.input_file.split('.fas')[0]+'_'+args.genetic_code.title()+'_AA.ORF.fasta' + args.tsv_out = args.input_file.split('.fas')[0]+'_'+args.genetic_code.title()+'_allOGCleanresults.tsv' + + args.home_folder = '/'.join(args.input_file.split('/')[:-1]) + args.Diamond_Folder = args.home_folder+'/DiamondOG' + args.StopFreq = args.home_folder+'/StopCodonFreq' + args.all_output_folder = '/'.join(args.input_file.split('/')[:-2]) + '/' + + args.tsv_file = args.input_file.split('.fas')[0]+ '_allOGCleanresults.tsv' + + + + + + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 5g_GCodeTranslate.py'\ + ' --input_file ../Stentor_coeruleus.WGS.CDS.Prep/Stentor_coeruleus.WGS.CDS.Renamed.fasta'\ + ' --genetic_code Universal'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + supported_gcodes_names = ['bleph','blepharisma','chilo','chilodonella','condy',\ + 'condylostoma','none','eup','euplotes','peritrich','vorticella','ciliate','universal',\ + 'taa','tag','tga','mesodinium'] + + supported_gcodes_list = ['Blepharisma\t(TGA = W)','Chilodonella\t(TAG/TGA = Q)','Ciliate\t\t(TAR = Q)',\ + 'Condylostoma\t(TAR = Q, TGA = W)','Euplotes\t(TGA = C)','Peritrich\t(TAR = E)','None\t\t(TGA/TAG/TAA = X)',\ + 'Universal\t(TGA/TAG/TAA = STOP)','TAA\t\t(TAG/TGA = Q)', 'TAG\t\t(TRA = Q)', 'TGA\t\t(TAR = Q)'] + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + + if args.genetic_code != None and args.genetic_code.lower() not in supported_gcodes_names: + print (color.BOLD+color.RED+'\nProvided genetic code is currently unsupported.\n\n'\ + 'If you have a new genetic code, please contact the author (with some evidence).\n\n'\ + 'Otherwise, use one of the currently supported genetic codes.\n'+color.END) + print (color.BOLD+color.ORANGE+'\n'.join(supported_gcodes_list)+'\n\n'+color.END) + print (author) + valid_arg += 1 + else: + if args.list_codes == True: + print (color.BOLD+color.RED+'\nThese are the currently supported genetic codes.\n'+color.END) + print (color.BOLD+color.ORANGE+'\n'.join(supported_gcodes_list)+'\n\n'+color.END) + valid_arg += 1 + + if args.author == True: + print (author) + valid_arg += 1 + + if args.input_file != None: + if os.path.isfile(args.input_file) != False: + if args.input_file.split('/')[-1] not in os.listdir('/'.join(args.input_file.split('/')[:-1])): + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + elif args.input_file.endswith('WTA_EPU.Renamed.fasta') != True: + print (color.BOLD+'\n\nInvalid Fasta File! Only Fasta Files that were processed'\ + ' with '+color.GREEN+'3_CountOGsDiamond.py '+color.END+color.BOLD+'are valid\n\n'\ + 'However, to bypass that issue, Fasta Files MUST end with '+color.CYAN+\ + '"WTA_EPU.Renamed.fasta"\n\n'+color.END) + valid_arg += 1 + else: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Fasta file '\ + '('+color.DARKCYAN+args.input_file.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_arg += 1 + + return valid_arg + + +########################################################################################### +###--------------------------- Does the Inital Folder Prep -----------------------------### +########################################################################################### + +def prep_folders(args): + + OG_folder = '/'.join(args.input_file.split('/')[:-1]) + '/DiamondOG/' + + if os.path.isdir(OG_folder) != True: + os.system('mkdir '+OG_folder) + + if os.path.isdir(args.all_output_folder + 'TranslatedTranscriptomes') != True: + os.system('mkdir ' + args.all_output_folder + 'TranslatedTranscriptomes') + + + +########################################################################################## +###---------------- Scans 5-Prime End of Transcript for In-Frame "ATG" ----------------### +########################################################################################## + +def check_new_start_new(some_seq, low_lim, upper_lim, old_start, codon_table): + + ## Looks for in-frame STOP codons in the UTR of the transcript + prime5 = str(Seq(some_seq[low_lim:upper_lim]).translate(table=codon_table)).replace('*','x') + in_frame_stops = [stops.start() for stops in re.finditer('x',prime5)] + + ## Looks for in-frame START codons in the UTR of the transcript + in_frame_starts = [starts.start() for starts in re.finditer('M',prime5)] + + ## Checks that there are NO in-frame STOP codons between the possible "new" START codon + ## and the aligned portion of the transcript -- THIS is double checked! + if len(in_frame_starts) != 0: + if len(in_frame_stops) != 0: + if in_frame_stops[-1] < in_frame_starts[-1]: + new_start = low_lim+in_frame_starts[-1]*3 + else: + new_start = old_start + else: + new_start = low_lim+in_frame_starts[-1]*3 + else: + new_start = old_start + + ## Skips the double-checking if there are no GOOD potential START codons + if new_start == old_start: + updated_start = old_start + + else: + ## Double checks that there are NO IN-FRAME stop codons between the NEW-SUGGESTED Start + ## position and the OLD-SUPPORTED stop position! + between_new_old_start = str(Seq(some_seq[new_start:old_start]).translate(table=1)).replace('*','x') + in_frame_stops_check = [stops.start() for stops in re.finditer('x',between_new_old_start)] + in_frame_starts_check = [starts.start() for starts in re.finditer('M',between_new_old_start)] + if len(in_frame_starts_check) != 0: + if len(in_frame_stops_check) != 0: + if in_frame_stops_check[-1] < in_frame_starts_check[-1]: + updated_start = new_start+in_frame_starts_check[-1]*3 + else: + updated_start = old_start + else: + updated_start = new_start + else: + updated_start = new_start + + return updated_start + + +########################################################################################## +###--------------- Extracts the ORF from the Fasta File and SpreadSheet ---------------### +########################################################################################## + +def extract_ORF(prot_dict, codon_table, args): + + print (color.BOLD+'\n\nExtracting '+color.PURPLE+'ORFs'+color.END+color.BOLD+' from'\ + ' the transcriptomic data-set\n\n'+color.END) + + for k, v in prot_dict.items(): + + ## Attempting to find the most-likely START (ATG) position in the transcript (tricky) + ## Skips this if the initial Methionine (ATG) is likely present + ## (e.g. the alignment position of the protein = '1') + prot_start = int(v[3].split('..')[0]) + old_start = v[1] + if prot_start != 1: + min_dist, max_dist = round_down_three(prot_start) + min_start = old_start-min_dist + max_start = old_start-max_dist + if min_start < 0: + min_start = old_start + if max_start < 0: + max_start = min_start%3 +# print k+'\tOld_start\t'+str(old_start)+'\tMin_Dist/Start\t'+str(min_dist)+'/'+str(min_start)+'\tMax_Dist/Start\t'+str(max_dist)+'/'+str(max_start)+'\n' + updated_start = check_new_start_new(v[-1], max_start, min_start, old_start, codon_table) + else: + updated_start = old_start + temp = prot_dict[k][-1][updated_start:] + + ## Uses the given genetic code to identify the stop position of the ORF + temp_prot = str(Seq(temp).translate(table=codon_table)) + if '*' in temp_prot: + stop_pos = (temp_prot.index('*')+1)*3 + prot_dict[k].append(temp[:stop_pos]) + else: + stop_pos = prot_dict[k][2] - prot_dict[k][1] + prot_dict[k].append(temp[:stop_pos]) + + ## Awkward_list is populated with unexpectedly SHORT ORFs! + ## Reasons for being short include: + # An error Xyrus introduced + # Not as great genetic code decision (in-frame stop) + # Crummy sequence/assembly quality (false in-frame stop codons) + + awkward_list = [] + look_good = [] + + for k, v in prot_dict.items(): + expected_min = len(v[-2][v[1]:v[2]])-1 + if len(v[-1]) < expected_min: + awkward_list.append(k) + else: + look_good.append(k) + + if len(awkward_list) != 0: + with open('UnexpexctedShortStuffBlameXyrus.txt','w+') as x: + for entry in awkward_list: + x.write(entry+'\n') + else: + pass + + print (color.BOLD+'\n\nTranslating '+color.PURPLE+'ORFs'+color.END+color.BOLD+' from'\ + ' using the '+color.DARKCYAN+args.genetic_code.title()+' genetic code'+color.END) + + for k, v in prot_dict.items(): + prot_dict[k].append(str(Seq(v[-1]).translate(table=codon_table)).rstrip('*')) + + return prot_dict + +########################################################################################## +###------------ Grabs the Coding Coordinates from the OG-BLAST SpreadSheet ------------### +########################################################################################## + +def prep_translations(args): + + print (color.BOLD+'\n\nGrabbing useful info from the '+color.ORANGE+args.input_file\ + .split('/')[-1]+color.END+color.BOLD+' Fasta File\nand from the '+color.ORANGE+args.tsv_file\ + .split('/')[-1]+color.END+color.BOLD+' OG-Assignment Spreadsheet'+color.END) + + inTSV = ['\t'.join(i.rstrip('\n').split('\t')[:-1]) for i in open(args.tsv_file).readlines() if i != '\n'] + inFasta = [i for i in SeqIO.parse(args.input_file,'fasta')] + + # ORF identification step here, uses the 'allOGCleanresults.tsv file to identify the ORF + prot_dict = {} + + # Special scenario! Only for when the genetic code is not particularly useful ... + if args.genetic_code.lower() == 'none' or args.genetic_code.lower() == 'condylostoma' or args.genetic_code.lower() == 'condy': + for i in inTSV: + prot_dict.setdefault(i.split('\t')[0],[]) + if int(i.split('\t')[6]) < int(i.split('\t')[7]): + ## Saves the Transcript Orientation (Coding vs. Template Strand) + prot_dict[i.split('\t')[0]].append('F') + ## Collects initial Start and Stop positions from the BLAST alignment + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[6])-1) + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[7])+3) + ## Implied Amino Acid alignment positions (e.g. does the alignment start at the 1st Methionine?) + prot_dict[i.split('\t')[0]].append('..'.join(i.split('\t')[-4:-2])) + + if int(i.split('\t')[7]) < int(i.split('\t')[6]): + ## Saves the Transcript Orientation (Coding vs. Template Strand) + prot_dict[i.split('\t')[0]].append('RC') + ## Collects initial Start and Stop positions from the BLAST alignment + prot_dict[i.split('\t')[0]].append(int(i.split('_Len')[1].split('_')[0])-int(i.split('\t')[6])) + prot_dict[i.split('\t')[0]].append(int(i.split('_Len')[1].split('_')[0])-int(i.split('\t')[7])+1) + ## Implied Amino Acid alignment positions (e.g. does the alignment start at the 1st Methionine?) + prot_dict[i.split('\t')[0]].append('..'.join(i.split('\t')[-4:-2])) + + ## Makes sure that the dictionary has the transcript in the correct orientation + for i in inFasta: + if i.description in prot_dict.keys(): + if 'RC' == prot_dict[i.description][0]: + prot_dict[i.description].append(str(i.seq.reverse_complement())) + else: + prot_dict[i.description].append(str(i.seq)) + + else: + for i in inTSV: + prot_dict.setdefault(i.split('\t')[0],[]) + if int(i.split('\t')[6]) < int(i.split('\t')[7]): + ## Saves the Transcript Orientation (Coding vs. Template Strand) + prot_dict[i.split('\t')[0]].append('F') + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[6])-1) + prot_dict[i.split('\t')[0]].append(int(i.split('\t')[7])+3) + ## Implied Amino Acid alignment positions (e.g. does the alignment start at the 1st Methionine?) + prot_dict[i.split('\t')[0]].append('..'.join(i.split('\t')[-4:-2])) + if int(i.split('\t')[7]) < int(i.split('\t')[6]): + ## Saves the Transcript Orientation (Coding vs. Template Strand) + prot_dict[i.split('\t')[0]].append('RC') + ## Collects initial Start and Stop positions from the BLAST alignment (but in the "correct" orientation) + prot_dict[i.split('\t')[0]].append(int(i.split('_Len')[1].split('_')[0])-int(i.split('\t')[6])) + prot_dict[i.split('\t')[0]].append(int(i.split('_Len')[1].split('_')[0])-int(i.split('\t')[7])+1) + ## Implied Amino Acid alignment positions (e.g. does the alignment start at the 1st Methionine?) + prot_dict[i.split('\t')[0]].append('..'.join(i.split('\t')[-4:-2])) + + ## Makes sure that the dictionary has the transcript in the correct orientation + for i in inFasta: + if i.description in prot_dict.keys(): + if 'RC' == prot_dict[i.description][0]: + prot_dict[i.description].append(str(i.seq.reverse_complement())) + else: + prot_dict[i.description].append(str(i.seq)) + + return prot_dict + + +########################################################################################## +###------------------------ Rounds Down Values to Nearest "3" -------------------------### +########################################################################################## + +def round_down_three(num): + min_val = int(num*3*.5)-int(num*3*.5)%3 + max_val = int(num*6)-int(num*6)%3 + return min_val, max_val + + +########################################################################################## +###--------------------- Makes Translation Steps (Later) Easier -----------------------### +########################################################################################## + +def standardize_gcode(given_code): + if given_code == 'ciliate' or given_code == 'tga': + codon_table = 6 + elif given_code == 'chilodonella' or given_code == 'chilo' or given_code == 'taa': + codon_table = c_uncinata_table + elif given_code == 'blepharisma' or given_code == 'bleph': + codon_table = blepharisma_table + elif given_code == 'euplotes' or given_code == 'eup': + codon_table = euplotes_table + elif given_code == 'myrionecta' or given_code == 'mesodinium': + codon_table = myrionecta_table + elif given_code == 'peritrich' or given_code == 'vorticella': + codon_table = peritrich_table + elif given_code == 'none': + codon_table = no_stop_table + elif given_code == 'condylostoma' or given_code == 'condy': + codon_table = condylostoma_table + elif given_code == 'tag': + codon_table = tag_table + elif given_code == 'universal': + codon_table = 1 + else: + print (color.BOLD+color.RED+'\n\nNo valid genetic code provided!\n\n'+color.END+\ + color.BOLD+'Using the "Universal" genetic code (by default)\n\nPlease check that the'\ + ' code you wish to use is supported:'+color.CYAN+'\n\npython 5_GCodeTranslate.py'\ + ' -list_codes\n\n'+color.END) + codon_table = 1 + + return codon_table + + +########################################################################################### +###------------------ Updates Spreadsheet with Updated Contig Names --------------------### +########################################################################################### + +def update_spreadsheet(args, updated_spreadsheet_dict): + if os.path.isdir(args.home_folder + '/DiamondOG/') != True: + os.system(args.home_folder + '/DiamondOG/') + else: + pass + + inTSV = [line.rstrip('\n') for line in open(args.tsv_file).readlines() if line != '\n' and line.split('\t')[0] in updated_spreadsheet_dict.keys()] + + updatedTSV = [updated_spreadsheet_dict[line.split('\t')[0]]+'\t'+'\t'.join(line.split('\t')[1:]) for line in inTSV] + + with open(args.tsv_out,'w+') as w: + w.write('\n'.join(updatedTSV)) + + +########################################################################################### +###-------------------- Updates Log With OG Assignment Information ---------------------### +########################################################################################### + +def update_log(filename, codon_table): + + if os.path.isdir('../PostAssembly_Logs/') != True: + os.system('mkdir ../PostAssembly_Logs/') + else: + pass + + ntd_ORF = [i for i in SeqIO.parse(filename.split('.fas')[0]+'_'+gcode.title()+'_ORF.fasta','fasta')] + aa_ORF = [i for i in SeqIO.parse(filename.split('.fas')[0]+'_'+gcode.title()+'_ORF.aa.fasta','fasta')] + + min_ntd_ORF = str(min([len(i.seq) for i in ntd_ORF])) + max_ntd_ORF = str(max([len(i.seq) for i in ntd_ORF])) + avg_ntd_ORF = '%.2f' % (sum([len(i.seq) for i in ntd_ORF])/float(len(ntd_ORF))) + + min_aa_ORF = str(min([len(i.seq) for i in aa_ORF])) + max_aa_ORF = str(max([len(i.seq) for i in aa_ORF])) + avg_aa_ORF = '%.2f' % (sum([len(i.seq) for i in aa_ORF])/float(len(aa_ORF))) + + for Logname in os.listdir(os.curdir+'./PostAssembly_Logs/'): + if Logname.startswith(filename.split('/')[2].split('_WTA')[0]) and Logname.endswith('Log.txt'): + with open('../PostAssembly_Logs/'+Logname,'a') as LogFile: + LogFile.write('Nucleotide ORFs\t'+str(len(ntd_ORF))+'\tn/a\tn/a\n') + LogFile.write('Nucleotide ORF Lengths\t'+avg_ntd_ORF+'\t'+min_ntd_ORF+'\t'+max_ntd_ORF+'\n') + LogFile.write('Protein ORFs\t'+str(len(aa_ORF))+'\tn/a\tn/a\n') + LogFile.write('Protein ORF Lengths\t'+avg_aa_ORF+'\t'+min_aa_ORF+'\t'+max_aa_ORF+'\n') + + +########################################################################################## +###----------------------- Write File with Provided Genetic Code ----------------------### +########################################################################################## + +def write_data_out(prot_dict, codon_table, args): + + update_spreadsheet_dict = {} + + #The code below only works if rnaspades was used; constrained by addition of script 6b + for k, v in prot_dict.items(): + #if 'Cov' in k: + new_name = k.split('_Len')[0]+'_Len'+str(len(v[-2]))+'_'+'_'.join(k.split('_')[-3:]) + #update_spreadsheet_dict[k] = new_name + update_spreadsheet_dict[k] = k + #else: + #new_name = k.split('_Len')[0]+'_Len'+str(len(v[-2]))+'_'+'_'.join(k.split('_')[-2:]) + #update_spreadsheet_dict[k] = new_name + #update_spreadsheet_dict[k] = k + + with open(args.ntd_out,'w+') as w: + print (color.BOLD+'\n\nWriting FASTA file with '+color.PURPLE+'ORF'+color.END+color.BOLD\ + +' sequences using the '+color.DARKCYAN+args.genetic_code.title()+' genetic code'+color.END) + + for k, v in prot_dict.items(): + w.write('>'+update_spreadsheet_dict[k]+'\n'+str(v[-2])+'\n') + + with open(args.aa_out, 'w+') as w: + print (color.BOLD+'\n\nWriting FASTA file with '+color.PURPLE+'Translated ORF'+color.END+color.BOLD\ + +' sequences using the '+color.DARKCYAN+args.genetic_code.title()+' genetic code'+color.END) + + for k, v in prot_dict.items(): + w.write('>'+update_spreadsheet_dict[k]+'\n'+str(v[-1])+'\n') + + return update_spreadsheet_dict + + +########################################################################################## +###--------------------- Cleans up the Folder and Moves Final Files -------------------### +########################################################################################## + +def clean_up(args): + + if args.input_file.split('.fas')[0].split('/')[-1] + '_StopCodonStats.tsv' in os.listdir(args.home_folder): + os.system('mv ' + args.input_file.split('.fas')[0] + '_StopCodonStats.tsv ' + args.StopFreq) + + os.system('mv '+args.tsv_file+' '+args.Diamond_Folder) + os.system('mv '+args.input_file+' '+args.Diamond_Folder) + + if args.no_RP == True: + if os.path.isdir(args.all_output_folder + 'ToRename/') != True: + os.system('mkdir ' + args.all_output_folder + 'ToRename/') + + os.system('cp ' + args.ntd_out + ' ' + args.all_output_folder + 'ToRename/') + os.system('cp ' + args.aa_out + ' ' + args.all_output_folder + 'ToRename/') + os.system('cp ' + args.tsv_out + ' ' + args.all_output_folder + 'ToRename/') + + else: + os.system('cp ' + args.tsv_out + ' ' + args.all_output_folder) + os.system('cp ' + args.ntd_out + ' ' + args.all_output_folder) + os.system('cp ' + args.aa_out + ' ' + args.all_output_folder) + + os.system('mv ' + args.home_folder + ' ' + args.all_output_folder + 'TranslatedTranscriptomes') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + print (color.BOLD+'\n\nLook for '+color.DARKCYAN+args.ntd_out.split('/')[-1]+color.END+\ + color.BOLD+',\n'+color.DARKCYAN+args.aa_out.split('/')[-1]+color.END+color.BOLD+', and\n'\ + +color.DARKCYAN+args.tsv_out.split('/')[-1]+color.END+color.BOLD+',\nwhich are in the '+\ + color.ORANGE+args.home_folder.split('/')[-1]+' Folder'+color.END) + + if args.no_RP == True: + print(color.BOLD+'\n\nNext Script is: '+color.GREEN+'7_FinalRename.py'+color.END+color.BOLD+\ + ' in the '+color.PURPLE+'RemovePartials Folder'+color.END+color.BOLD+'\nwith a copy of'\ + ' the outputs of this script!'+color.END) + print(color.BOLD+'\n\nRemember that you have chosen '+color.RED+'NOT '+color.END+color.BOLD+\ + 'to remove partials\nand are skipping to the renaming step!\n\n'+color.END) + + else: + print(color.BOLD+'\n\nNext Script is: '+color.GREEN+'6_FilterPartials.py'+color.END+color.BOLD+\ + ' in the '+color.PURPLE+'FinalizeTranscripts Folder'+color.END+color.BOLD+'\nwith a copy of'\ + ' the outputs of this script!\n\n'+color.END) + + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + args = check_args() + + prep_folders(args) + + codon_table = standardize_gcode(args.genetic_code.lower()) + + prot_dict_Prepped = prep_translations(args) + + prot_dict_Final = extract_ORF(prot_dict_Prepped, codon_table, args) + + new_spreadsheet_names = write_data_out(prot_dict_Final, codon_table, args) + + update_spreadsheet(args, new_spreadsheet_names) + +# update_log(fasta_file, gcode) + + clean_up(args) + + next_script(args) + +main() \ No newline at end of file diff --git a/PTL1/Transcriptomes/Scripts/6_FilterPartials.py b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py new file mode 100644 index 0000000..33cab5e --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/6_FilterPartials.py @@ -0,0 +1,652 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 2020-11-29 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com; xyrus.maurer-alcala@izb.unibe.ch +##__Usage__: python 6_FilterPartials.py --help + + +################################################################################################## +## This script is intended to remove incomplete transcripts that have a more complete mate ## +## ## +## Prior to running this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Removed SSU/LSU sequences from your Fasta File ## +## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ## +## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ## +## 6. You either know (or have inferred) the genetic code of the organism ## +## 7. You have translated the sequences and checked for the data in the RemovePartials folder ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## 7_FinalRename.py ## +## ## +################################################################################################## + +from Bio import SeqIO +from Bio.Seq import Seq +from statistics import mean + +from distutils import spawn +import argparse, os, sys, time, re +from argparse import RawTextHelpFormatter,SUPPRESS + + + +#------------------------------ Colors For Print Statements ------------------------------# +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###---------------------------- UPDATE DIAMOND PATH BELOW! -----------------------------### +########################################################################################### + ## IF Diamond is IN YOUR PATH then no updating is needed... + +def check_diamond_path(): + + diamond_path = '' + + if diamond_path == '': + diamond_path = spawn.find_executable("diamond") + #diamond_path = '/path/to/diamond' + else: + pass + + if diamond_path == None: + print (color.BOLD + '\n\nPlease open this script and check that you have included'\ + +' the PATH to the'+color.BLUE+' "diamond" '+color.END+color.BOLD+'executable.\n\n'+color.END) + print (color.BOLD+color.BLUE+'LOOK FOR:\n\n'+color.RED\ + +'#------------------------------ UPDATE DIAMOND PATH BELOW! -------------------------------#'\ + +color.BLUE+'\n\nThis is somewhere around lines 50 - 80...\n\n'+color.END) + + sys.exit() + else: + pass + + return diamond_path + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD + '\n\nThis script is intended to '+color.RED+'Identify and Collapse '+color.END\ + +color.BOLD+'partial '+color.PURPLE+'ORFS\n'+color.END+color.BOLD+'present within a '\ + +color.RED+'Given'+color.END+color.BOLD+' transcriptome (or replicate) transcriptome(s)'\ + +usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--file_prefix','-fp', action='store', + help=color.BOLD+color.GREEN+' File prefix that is unique (or common)\n to the files '\ + 'to be processed\n'+color.END) + + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + + optional_arg_group.add_argument('--identity','-id', type=float, action='store', default=0.98, + help=color.BOLD+color.GREEN+' Identity threshold for identifying \n "partials" to larger'\ + ' contigs\n (default = 0.98)\n'+color.END) + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END) + optional_arg_group.add_argument('--hook_fasta','-f', help='Path to the fasta file of the Hook DB in the Databases/db_OG folder') + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + args.id_print = str(int(float(args.identity)*100)) + + args.all_output_folder = '/'.join(args.file_prefix.split('/')[:-1]) + '/' + args.file_prefix = args.file_prefix.split('/')[-1] + + args.file_listNTD = [args.all_output_folder + i for i in os.listdir(args.all_output_folder) if args.file_prefix in i and i.endswith('NTD.ORF.fasta')] + + args.file_listAA = [args.all_output_folder + i for i in os.listdir(args.all_output_folder) if args.file_prefix in i and i.endswith('AA.ORF.fasta')] + + args.file_listTSV = [args.all_output_folder + i for i in os.listdir(args.all_output_folder) if args.file_prefix in i and i.endswith('results.tsv')] + + quit_eval = return_more_info(args) + if quit_eval > 0: + print ('\n') + sys.exit() + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 6_RemovePartials.py'\ + ' --file_prefix Op_me_Xxma'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_arg = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_arg += 1 + + if args.file_listNTD == []: + print (color.BOLD+'\n\nNo '+color.ORANGE+'Nucleotide Fasta Files'+color.END+color.BOLD+\ + ' found!\n\nCheck that your'+color.GREEN+' File Prefix'+color.END+color.BOLD+\ + 'is present in\nthe files of interest') + valid_arg += 1 + + if args.file_listAA == []: + print (color.BOLD+'\n\nNo '+color.ORANGE+'Protein Fasta Files'+color.END+color.BOLD+\ + ' found!\n\nCheck that your'+color.GREEN+' File Prefix'+color.END+color.BOLD+\ + 'is present in\nthe files of interest') + valid_arg += 1 + + if args.file_listTSV == []: + print (color.BOLD+'\n\nNo '+color.ORANGE+'OG-Assignment Spreadsheets'+color.END+color.BOLD+\ + ' found!\n\nCheck that your'+color.GREEN+' File Prefix'+color.END+color.BOLD+\ + 'is present in\nthe files of interest') + valid_arg += 1 + + if len(args.file_listNTD) == len(args.file_listAA) == len(args.file_listTSV): + pass + else: + print (color.BOLD+color.RED+'\n\nError:'+color.END+color.BOLD+' Unequal numbers of'\ + ' input files found.\n\nDouble-check that there are:'+color.CYAN+'SINGLE'+color.END\ + +color.BOLD+' Nucleotide and Protein fasta files and OG-assignment Spreadsheet for'\ + ' each transcriptome\n\nThen try once again.'+color.END) + valid_arg += 1 + + return valid_arg + + +########################################################################################## +###------------------------- Creates Folders For Storing Data -------------------------### +########################################################################################## + +def prep_folders(args): + + if os.path.isdir(args.all_output_folder + 'ToRename') != True: + os.system('mkdir ' + args.all_output_folder + 'ToRename') + + if os.path.isdir(args.all_output_folder + args.file_prefix) != True: + os.system('mkdir ' + args.all_output_folder + args.file_prefix) + + if os.path.isdir(args.all_output_folder + args.file_prefix + '/Original') != True: + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Original') + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Original/SpreadSheets') + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Original/Concatenated/') + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Original/Concatenated/SpreadSheets') + + if os.path.isdir(args.all_output_folder + args.file_prefix + '/Processed') != True: + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Processed') + os.system('mkdir ' + args.all_output_folder + args.file_prefix + '/Processed/SpreadSheets') + + +########################################################################################## +###-------------------- Merges Fasta Files When Replicates Present --------------------### +########################################################################################## + +def merge_fasta_replicates(args, type): + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/' + + count = 0 + fasta_to_merge = [] + + if type == 'NTD': + fasta_list = args.file_listNTD + else: + fasta_list = args.file_listAA + + for file in fasta_list: + fasta_to_merge += ['>'+str(count)+'_'+i for i in open(file).read().split('>') if i != ''] + count += 1 + + with open(cat_folder+args.file_prefix+'.'+type+'.Concatenated.fasta','w+') as w: + w.write(''.join(fasta_to_merge)) + + time.sleep(.75) + + +########################################################################################## +###--------------------- Merges TSV Files When Replicates Present ---------------------### +########################################################################################## + +def merge_tsv_replicates(args): + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/SpreadSheets/' + + count = 0 + tsv_to_merge = [] + + for file in args.file_listTSV: + tsv_to_merge += [str(count)+'_'+i for i in open(file).read().split('\n') if i != ''] + count += 1 + + with open(cat_folder+args.file_prefix+'_Concatenated.allOGCleanresults.tsv','w+') as w: + w.write('\n'.join(tsv_to_merge)) + + time.sleep(.75) + + +########################################################################################## +###------------------ Calls on the other Merge Functions by Data Type -----------------### +########################################################################################## + +def merge_relevant_data(args): + + print (color.BOLD+'\n\nMerging Transcriptome data together.'+color.END) + + merge_fasta_replicates(args, 'NTD') + merge_fasta_replicates(args, 'AA') + merge_tsv_replicates(args) + + +########################################################################################## +###------------------- Uses Diamond to perform Self-vs-Self "BLAST" -------------------### +########################################################################################## + +def self_blast(args, diamond_path): + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/' + + diamond_makedb = diamond_path + ' makedb --in ' + cat_folder + args.file_prefix + '.AA.Concatenated.fasta -d ' + cat_folder + args.file_prefix + '.AA.Concatenated' + + diamond_self = diamond_path + ' blastp -q ' + cat_folder + args.file_prefix + '.AA.Concatenated.fasta -d ' + cat_folder + args.file_prefix + '.AA.Concatenated --strand plus --no-self-hits --id '+str(args.identity)+\ + ' --query-cover 0.7 --evalue 1e-15 --threads 60 --outfmt 6 -o ' + cat_folder + 'SpreadSheets/' + args.file_prefix + '.Concatenated.Self.'+str(args.id_print)+'ID.tsv' + + print (color.BOLD+'\n\nBinning ALL '+color.ORANGE+'Nucleotide ORFs'+color.END+color.BOLD\ + +' for '+color.GREEN+args.file_prefix+color.END+color.BOLD+' at '+args.id_print\ + +'% identity.\n\n'+color.END) + + os.system(diamond_makedb) + os.system(diamond_self) + + return cat_folder+'SpreadSheets/'+args.file_prefix+'.Concatenated.Self.'+str(args.id_print)+'ID.tsv' + + +########################################################################################## +###------------------- Uses USearch to perform Self-vs-Self "BLAST" -------------------### +########################################################################################## + +def check_Self_vs_Self(tsv_file): + + evaluation = '' + + tsv_in = [i for i in open(tsv_file).read().split('\n') if i != ''] + + if len(tsv_in) == 0: + evaluation = 'empty' + with open(tsv_file,'w+') as w: + w.write('No Self-vs-Self hits were found') + else: + evaluation = 'continue' + + return evaluation + + + +########################################################################################## +###-------------------- Removes Nearly Identical ORFs from Data Set -------------------### +########################################################################################## + +def filter_NTD_data(args): + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/' + proc_folder = args.all_output_folder + args.file_prefix + '/Processed/' + + ########################################## + ## Set-up Useful Lists and Dictionaries ## + ########################################## + + nuc_Above98_hit = {} + seqs_to_toss = [] + prepped_NTD = [] + prepped_AA = [] + + nuc_tsv_100 = 0 + + replicates = '' + + if len(args.file_listNTD) > 1: + replicates = 'yes' + else: + replicates = 'nope' + + OGLenDB = {} + for rec in SeqIO.parse(args.hook_fasta, 'fasta'): + if rec.id[-10:] not in OGLenDB: + OGLenDB.update({ rec.id[-10:] : [] }) + + OGLenDB[rec.id[-10:]].append(len(str(rec.seq))) + + for og in OGLenDB: + OGLenDB[og] = mean(OGLenDB[og]) + + print (color.BOLD+'\n\nRemoving Partial '+color.PURPLE+'ORFs'+color.END+color.BOLD+\ + ' with >'+args.id_print+'% Nucleotide Identity over >70% of\ntheir length when '\ + 'compared to more complete '+color.PURPLE+'ORFs '+color.END+color.BOLD+'from: '\ + +color.CYAN+args.file_prefix+'\n\n'+color.END) + + ##################################################################### + ## Self-v-self BLAST Output Parsing - first checks for Seq-length! ## + ##################################################################### + + nuc_tsv_raw = [i.rstrip('\n') for i in open(cat_folder+'SpreadSheets/'+args.file_prefix\ + +'.Concatenated.Self.'+str(args.id_print)+'ID.tsv').readlines() if i != '\n'] + + too_long = 0 + for line in nuc_tsv_raw: + og_number = re.split('OG.{1}_', line)[-1][:6] + og_prefix = line.split(og_number)[0][-4:] + og = og_prefix + og_number + + if og in OGLenDB.keys(): + if int(line.split('Len')[-1].split('_')[0]) > 4.5*OGLenDB[og] or int(line.split('Len')[-1].split('_')[0]) < 1.5*OGLenDB[og]: + seqs_to_toss.append(line.split('\t')[1]) + too_long += 1 + + nuc_tsv = [line for line in nuc_tsv_raw if line.split('\t')[1] not in seqs_to_toss] + + if len(nuc_tsv) > 0: + if 'Cov' in nuc_tsv[0].split('\t')[0].split('_')[-3]: + nuc_tsv.sort(key=lambda x: (-int(x.split('\t')[1].split('Len')[-1].split('_')[0]),-int(x.split('\t')[1].split('Cov')[-1].split('_')[0]))) + else: + nuc_tsv.sort(key=lambda x: -int(x.split('\t')[1].split('Len')[-1].split('_')[0])) + + + for line in nuc_tsv: + if line.split('\t')[1] not in seqs_to_toss: + nuc_Above98_hit.setdefault(line.split('\t')[1],[]).append(line.split('\t')[0]) + seqs_to_toss.append(line.split('\t')[0]) + if line.split('\t')[2] == '100.0': + nuc_tsv_100 += 1 + + seqs_to_toss = list(set(seqs_to_toss)) + inFasta_NTD_rawLen = [i for i in SeqIO.parse(cat_folder+args.file_prefix+'.NTD.Concatenated.fasta', 'fasta') if i.description] + inFasta_NTD = [i for i in inFasta_NTD_rawLen if i.description not in seqs_to_toss] + inFasta_AA = [i for i in SeqIO.parse(cat_folder+args.file_prefix+'.AA.Concatenated.fasta','fasta') if i.description not in seqs_to_toss] + + if replicates != '': + for i in inFasta_NTD: + if i.description not in nuc_Above98_hit.keys(): + prepped_NTD.append('>'+'_'.join(i.description.split('_')[1:])+'_Trans1\n'+str(i.seq)) + else: + Rep_Num = str(len(set([i.description.split('_')[0]]+[j.split('_')[0] for j in nuc_Above98_hit[i.description]]))) + prepped_NTD.append('>'+'_'.join(i.description.split('_')[1:])+'_Trans'+Rep_Num+'\n'+str(i.seq)) + for i in inFasta_AA: + if i.description not in nuc_Above98_hit.keys(): + prepped_AA.append('>'+'_'.join(i.description.split('_')[1:])+'_Trans1\n'+str(i.seq).replace('*','X')) + else: + Rep_Num = str(len(set([i.description.split('_')[0]]+[j.split('_')[0] for j in nuc_Above98_hit[i.description]]))) + prepped_AA.append('>'+'_'.join(i.description.split('_')[1:])+'_Trans'+Rep_Num+'\n'+str(i.seq).replace('*','X')) + else: + for i in inFasta_NTD: + if i.description not in nuc_Above98_hit.keys(): + prepped_NTD.append('>'+i.description+'\n'+str(i.seq)) + else: + prepped_NTD.append('>'+i.description+'\n'+str(i.seq)) + for i in inFasta_AA: + if i.description not in nuc_Above98_hit.keys(): + prepped_AA.append('>'+i.description+'\n'+str(i.seq).replace('*','X')) + else: + prepped_AA.append('>'+i.description+'\n'+str(i.seq).replace('*','X')) + + with open(args.all_output_folder + args.file_prefix + '/'+args.file_prefix+'_SeqPairsAbove98.txt','w+') as w: + for k, v in nuc_Above98_hit.items(): + w.write(k+'\t'+'\t'.join(v)+'\n') + + ################################################################################### + ## Check for abnormally short sequences for the taxon for every Gene Family (OG) ## + ################################################################################### + + print (color.BOLD+'Removing Abnormally Short (70% length) OR Long (200% length)'\ + +color.PURPLE+' ORFs'+color.END+color.BOLD+'\ncompared to typical '+color.ORANGE+'Gene '\ + 'Family '+color.END+color.BOLD+'member length for: '+color.CYAN+args.file_prefix+'\n\n'+color.END) + + self_OGLenDB={} ## + seqs_to_toss = [] ## + too_long = too_short = 0 ## + + for i in prepped_NTD: + og_number = re.split('OG.{1}_', i.split('\n')[0])[-1][:6] + og_prefix = i.split('\n')[0].split(og_number)[0][-4:] + og = og_prefix + og_number + + self_OGLenDB.setdefault(og,[]).append(len(i.split('\n')[-1])) + + good_NTD_names = [] + for i in prepped_NTD: + og_number = re.split('OG.{1}_', i.split('\n')[0])[-1][:6] + og_prefix = i.split('\n')[0].split(og_number)[0][-4:] + og = og_prefix + og_number + + if (0.7*sum(self_OGLenDB[og])/float(len(self_OGLenDB[og]))) <= len(i.split('\n')[-1]) <= (2*sum(self_OGLenDB[og])/float(len(self_OGLenDB[og]))): + good_NTD_names.append(i.split('\n')[0]) + + good_NTD_seqs = [i for i in prepped_NTD if i.split('\n')[0] in good_NTD_names] + good_AA_seqs = [i for i in prepped_AA if i.split('\n')[0] in good_NTD_names] + + too_short = len(prepped_NTD) - len(good_NTD_names) + + #################################################################### + ## Finalized Outputs are Summarized and Written Out to New Fastas ## + #################################################################### + + print (color.BOLD+'There were '+color.CYAN+str(len(inFasta_NTD_rawLen))+color.END+color.BOLD\ + +color.PURPLE+' ORFs '+color.END+color.BOLD+'originally, with '+color.ORANGE+\ + str(nuc_tsv_100)+color.END+color.BOLD+' Partial '+color.PURPLE+'ORFs'+color.END+\ + color.BOLD+' that\nwere '+color.RED+'100% Identical'+color.END+color.BOLD+' to larger'\ + +color.PURPLE+' ORFs.\n\n'+color.END) + + print(color.BOLD+'Of the '+color.CYAN+str(len(inFasta_NTD_rawLen))+color.END+color.BOLD\ + +' original'+color.PURPLE+' ORFs'+color.END+color.BOLD+', '+color.ORANGE+str(len(set(seqs_to_toss)))+\ + color.END+color.BOLD+' are '+color.PURPLE+'Partial ORFs '+color.END+color.BOLD+'(e.g. '+\ + color.RED+'> '+args.id_print+'%'+color.END+color.BOLD+'\nNUCLEOTIDE identity) to larger'\ + +color.PURPLE+' ORFs'+color.END+color.BOLD+' with '+color.ORANGE+str(too_short+too_long)\ + +color.END+color.BOLD+' additional'+color.PURPLE+' ORFs\n'+color.END+color.BOLD+'that were either '+\ + color.RED+'TOO LONG or SHORT.\n\n'+color.END) + + print (color.BOLD+'Overall, there are '+color.GREEN+str(len(good_NTD_seqs))+' Unique ORFs'\ + +color.END+color.BOLD+' for '+color.CYAN+args.file_prefix+'\n'+color.END) + + with open(proc_folder+args.file_prefix+'_Filtered.Final.NTD.ORF.fasta','w+') as w: + for i in good_NTD_seqs: + w.write(i+'\n') + with open(proc_folder+args.file_prefix+'_Filtered.Final.AA.ORF.fasta','w+') as x: + for i in good_AA_seqs: + x.write(i+'\n') + + return good_NTD_names + + +########################################################################################## +###------------------- Updates SpreadSheet with Update Sequence Names -----------------### +########################################################################################## + +def update_tsv(args, NTD_list_names): + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/SpreadSheets/' + proc_folder = args.all_output_folder + args.file_prefix + '/Processed/' + + inTSV = {'_'.join(i.split('\t')[0].split('_')[1:]):'\t'.join(i.split('\t')[1:]) for i in open(cat_folder+\ + args.file_prefix+'_Concatenated.allOGCleanresults.tsv').readlines() if i != '\n'} + + Updated_inTSV = [i.strip('>')+'\t'+inTSV[i.split('_Trans')[0].strip('>')] for i in NTD_list_names] + + with open(proc_folder+'/SpreadSheets/'+args.file_prefix+'_Filtered.Final.allOGCleanresults.tsv','w+') as w: + for line in Updated_inTSV: + w.write(line+'\n') + + +def no_partials_present(args, OGLenDB): + + print (color.BOLD+color.RED+'\n\nWarning:'+color.END+color.BOLD+' No partial sequences'\ + ' were found with > '+str(args.id_print)+'% nucleotide identity.\n\nThe data will still be '\ + 'checked for ORFs that are unexpectedly '+color.ORANGE+'Short'+color.END+color.BOLD+' or'\ + +color.ORANGE+' Long.\n\n'+color.END) + + cat_folder = args.all_output_folder + args.file_prefix + '/Original/Concatenated/' + proc_folder = args.all_output_folder + args.file_prefix + '/Processed/' + + NTD_file = cat_folder+args.file_prefix+'.NTD.Concatenated.fasta' + AA_file = cat_folder+args.file_prefix+'.AA.Concatenated.fasta' + TSV_file = cat_folder+'/SpreadSheets/'+args.file_prefix+'_Concatenated.allOGCleanresults.tsv' + + OGLenDB = {} + for rec in SeqIO.parse(args.hook_fasta, 'fasta'): + if rec.id[-10:] not in OGLenDB: + OGLenDB.update({ rec.id[-10:] : [] }) + + OGLenDB[rec.id[-10:]].append(len(str(rec.seq))) + + for og in OGLenDB: + OGLenDB[og] = mean(OGLenDB[og]) + + self_OGLenDB = {} + seqs_to_toss = [] + too_long, too_short = 0, 0 + + ## Small changes in this section for Auden (ought to work now) + ## Lists -> Dictionaries and some data curation steps + + inFasta = {i.description:str(i.seq) for i in SeqIO.parse(NTD_file,'fasta')} + + for k,v in inFasta.items(): + og_number = re.split('OG.{1}_', k)[-1][:6] + og_prefix = k.split(og_number)[0][-4:] + og = og_prefix + og_number + + if len(v) >= 4.5*OGLenDB[og]: + seqs_to_toss.append(k) + too_long+= 1 + + prepped_NTD = [i for i in inFasta if i not in seqs_to_toss] + + print (color.BOLD+'Removing Abnormally Short (70% length) OR Long (200% length)'\ + +color.PURPLE+' ORFs'+color.END+color.BOLD+'\ncompared to typical '+color.ORANGE+'Gene '\ + 'Family '+color.END+color.BOLD+'member length for: '+color.CYAN+args.file_prefix+'\n\n'+color.END) + + ## toss those sequences from the sequence dictonary (less headache) + for crap_seq in seqs_to_toss: + del inFasta[crap_seq] + + for k, v in inFasta.items(): + og_number = re.split('OG.{1}_', k)[-1][:6] + og_prefix = k.split(og_number)[0][-4:] + og = og_prefix + og_number + + self_OGLenDB.setdefault(og,[]).append(len(v)) + + self_OGLenDB_Final = {k:sum(v)/len(v) for k, v in self_OGLenDB.items()} + + good_NTD_data = { } + for k, v in inFasta.items(): + og_number = re.split('OG.{1}_', k)[-1][:6] + og_prefix = k.split(og_number)[0][-4:] + og = og_prefix + og_number + + if 0.7*self_OGLenDB_Final[og] <= len(v) <= 2*self_OGLenDB_Final[og]: + good_NTD_data.update({ k : v }) + + + good_AA_data = {i.description:str(i.seq) for i in SeqIO.parse(AA_file,'fasta') if i.description in good_NTD_data.keys()} + + good_TSV_data = [i for i in open(cat_folder+'/SpreadSheets/'+args.file_prefix+'_Concatenated.allOGCleanresults.tsv')\ + .read().split('\n') if i != '' and i.split('\t')[0] in good_NTD_data.keys()] + + renamed_TSV_data = [i.split('\t')[0]+'_Trans1\t'+'\t'.join(i.split('\t')[1:]) for i in good_TSV_data] + + + with open(proc_folder+args.file_prefix+'_Filtered.Final.NTD.ORF.fasta','w+') as w: + for k,v in good_NTD_data.items(): + w.write('>'+k+'_Trans1\n'+v+'\n') + + with open(proc_folder+args.file_prefix+'_Filtered.Final.AA.ORF.fasta','w+') as x: + for k, v in good_AA_data.items(): + x.write('>'+k+'_Trans1\n'+v+'\n') + + with open(proc_folder+'/SpreadSheets/'+args.file_prefix+'_Filtered.Final.allOGCleanresults.tsv','w+') as y: + y.write('\n'.join(renamed_TSV_data)) + + +########################################################################################## +###--------------------- Cleans up the Folder and Moves Final Files -------------------### +########################################################################################## + +def clean_up(args): + + for i in args.file_listNTD: + os.system('mv ' + i + ' ' + args.all_output_folder + args.file_prefix + '/Original/') + os.system('mv ' + i.replace('NTD.ORF.fasta','AA.ORF.fasta') + ' ' + args.all_output_folder + args.file_prefix + '/Original/') + os.system('mv ' + i.split('named')[0]+'named*allOGCleanresults.tsv ' + args.all_output_folder + args.file_prefix + '/Original/SpreadSheets/') + + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(): + + print(color.BOLD+'\nNext Script is: '+color.GREEN+'6b_update_cov_post_removepartials.py\n\n'+color.END) + + +########################################################################################## +###------------------- Checks Command Line Arguments and Calls Steps ------------------### +########################################################################################## + +def main(): + + diamond_path = check_diamond_path() + + args = check_args() + + prep_folders(args) + + merge_relevant_data(args) + + self_BLAST_out = self_blast(args, diamond_path) + + evaluation = check_Self_vs_Self(self_BLAST_out) + + if evaluation != 'empty': + NTD_names = filter_NTD_data(args) + update_tsv(args, NTD_names) + else: + no_partials_present(args) + + clean_up(args) + + next_script() + +main() diff --git a/PTL1/Transcriptomes/Scripts/6b_update_cov_post_removepartials.py b/PTL1/Transcriptomes/Scripts/6b_update_cov_post_removepartials.py new file mode 100644 index 0000000..6d248b1 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/6b_update_cov_post_removepartials.py @@ -0,0 +1,88 @@ +#!/usr/bin/python +from __future__ import print_function + +__author__ = "Jean-David Grattepanche" +__version__ = "2, August 28, 2017" +__email__ = "jeandavid.grattepanche@gmail.com" + + +import sys +import os +import re +import time +import string +import os.path +from Bio import SeqIO +from sys import argv + +def Addcoverage(code): + seqfolder = code + all_output_folder = '/'.join(code.split('/')[:-1]) + code = code.split('/')[-1] + + covupd = {} + for seqcoll in open(seqfolder + '/' + code + '_SeqPairsAbove98.txt','r'): + CL = 0 + for transc in seqcoll.split('\t'): + if CL == 0: + reftrans = ('_').join(transc.split('_')[1:]) + coverage = int(transc.split('Cov')[1].split('_')[0]) + Length = int(transc.split('Len')[1].split('_')[0]) + CL += coverage * Length + covupd[reftrans] = CL + + if os.path.isdir(seqfolder + '/Updated_Coverage/') != True: + os.system('mkdir ' + seqfolder + '/Updated_Coverage/') + if os.path.isdir(seqfolder + '/Updated_Coverage/SpreadSheets/') != True: + os.system('mkdir ' + seqfolder + '/Updated_Coverage/SpreadSheets/') + + for spreadsh in os.listdir(seqfolder + '/Processed/SpreadSheets/'): + if spreadsh.endswith('.tsv'): + outtsvtokeep = open(seqfolder + '/Updated_Coverage/SpreadSheets/' + spreadsh.split('Final')[0] + 'UC.Final' + spreadsh.split('Final')[1],'w+') + for row in open(seqfolder + '/Processed/SpreadSheets/'+ spreadsh, 'r'): + if row.split('_Trans')[0] in covupd: + og_number = re.split('OG.{1}_', row)[-1][:6] + og_prefix = row.split(og_number)[0][-4:] + og = og_prefix + og_number + + newcov2 = round(covupd[row.split('_Trans')[0]] / int(row.split('_Len')[1].split('_')[0])) + outtsvtokeep.write(row.split('Cov')[0]+'Cov'+str(newcov2)+'_' + og_prefix +row.split(og_prefix)[1].split('_Trans')[0] +'\t' +('\t').join(row.split('\t')[1:])) + else: + if 'Trans' in row: + outtsvtokeep.write(row.split('_Trans')[0]+ '\t' +('\t').join(row.split('\t')[1:])) + else: + outtsvtokeep.write(row) + outtsvtokeep.close() + + for seqfile in os.listdir(seqfolder + '/Processed'): + if seqfile.endswith('.fasta'): + outseqtokeep = open(seqfolder + '/Updated_Coverage/' + seqfile.split('Final')[0] + 'UC.Final' + seqfile.split('Final')[1],'w+') + for Seq in SeqIO.parse(seqfolder + '/Processed/' + seqfile ,'fasta'): + if Seq.description.split('_Trans')[0] not in covupd: + outseqtokeep.write('>'+Seq.description.split('_Trans')[0]+ '\n'+str(Seq.seq) +'\n') + else: + og_number = re.split('OG.{1}_', Seq.description)[-1][:6] + og_prefix = Seq.description.split(og_number)[0][-4:] + og = og_prefix + og_number + + newcov = round(covupd[Seq.description.split('_Trans')[0]] / int(Seq.description.split('_Len')[1].split('_')[0])) + outseqtokeep.write('>'+Seq.description.split('Cov')[0]+'Cov'+str(newcov)+'_' + Seq.description.split(og)[0][-2:] + og + '\n'+str(Seq.seq) +'\n') + outseqtokeep.close() + + if os.path.isdir(all_output_folder + '/ToRename') != True: + os.system('mkdir ' + all_output_folder + '/ToRename') + + os.system('cp ' + seqfolder + '/Updated_Coverage/*fasta ' + all_output_folder + '/ToRename/') + os.system('cp ' + seqfolder + '/Updated_Coverage/SpreadSheets/*tsv ' + all_output_folder + '/ToRename/') + + +def main(): + script, code = argv + Addcoverage(code) +main() + + + + + + diff --git a/PTL1/Transcriptomes/Scripts/7_FinalizeName.py b/PTL1/Transcriptomes/Scripts/7_FinalizeName.py new file mode 100644 index 0000000..677a836 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/7_FinalizeName.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python3.5 + +##__Updated__: 31_08_2017 +##__Author__: Xyrus Maurer-Alcala; maurerax@gmail.com +##__Usage__: python 6_FilterPartials.py --help + +################################################################################################## +## This script is intended to rename the outputs of the FilterPartials script ## +## to a given 10-character that is used in the Katz lab Phylogenomic Tree building methods ## +## ## +## Prior to r`ning this script, ensure the following: ## +## ## +## 1. You have assembled your transcriptome and COPIED the 'assembly' file ## +## (contigs.fasta, or scaffolds.fasta) to the PostAssembly Folder ## +## 2. Removed small sequences (usually sequences < 300bp) with ContigFilterPlusStats.py ## +## 3. Removed SSU/LSU sequences from your Fasta File ## +## 4. Classified your sequences as Strongly Prokaryotic/Eukaryotic or Undetermined ## +## 5. Classified the Non-Strongly Prokaryotic sequences into OGs ## +## 6. You either know (or have inferred) the genetic code of the organism ## +## 7. You have translated the sequences and checked for the data in the RemovePartials folder ## +## 8. Partial sequences have been removed from the transcriptomic data sets ## +## ## +## COMMAND Example Below ## +## Extra Notes at Bottom of Script ## +## ## +## E-mail Xyrus (author) for help if needed: maurerax@gmail.com ## +## ## +## Next Script(s) to Run: ## +## NONE! You're FINISHED! :D ## +## ## +################################################################################################## + +import argparse, os, sys +from argparse import RawTextHelpFormatter,SUPPRESS + +#----------------------- Solely to Make Print Statements Colorful -----------------------# + +class color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARKCYAN = '\033[36m' + ORANGE = '\033[38;5;214m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + +#------------------------------- Main Functions of Script --------------------------------# + +########################################################################################### +###--------------------- Parses and Checks Command-Line Arguments ----------------------### +########################################################################################### + +def check_args(): + + parser = argparse.ArgumentParser(description= + color.BOLD + '\n\nThis script is intended to '+color.RED+'Rename '+color.END\ + +color.BOLD+'the core set of '+color.PURPLE+'ORFS\n'+color.END+color.BOLD+'with a valid '\ + +color.RED+'10-character code'+color.END+color.BOLD+' for use in the KatzLab\nPhylogenomic Pipeline'\ + +usage_msg(), usage=SUPPRESS, formatter_class=RawTextHelpFormatter) + + required_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Required Options'+color.END) + + required_arg_group.add_argument('--input_file','-in', action='store', + help=color.BOLD+color.GREEN+' One of the Fasta files that is to be renamed\n'+color.END) + required_arg_group.add_argument('--name','-n', action='store', + help=color.BOLD+color.GREEN+' A valid 10-Character code for updating the data\n'+color.END) + + + optional_arg_group = parser.add_argument_group(color.ORANGE+color.BOLD+'Options'+color.END) + + optional_arg_group.add_argument('-author', action='store_true', + help=color.BOLD+color.GREEN+' Prints author contact information\n'+color.END) + + if len(sys.argv[1:]) == 0: + print (parser.description) + print ('\n') + sys.exit() + + args = parser.parse_args() + + quit_eval = return_more_info(args) + if quit_eval > 0: + print ('\n') + sys.exit() + + args.all_output_folder = '/'.join(args.input_file.split('/')[:-2]) + + if '.allOGCleanresults' in args.input_TSV: + args.out_XML = args.name+'_XX_'+args.input_TSV.split('/')[-1].replace('.allOGCleanresults.','.AA.ORF.')\ + .replace('.tsv','.fasta')+'_1e-10keepall_BlastOutall.oneHit' + else: + args.out_XML = args.name+'_XX_'+args.input_TSV.split('/')[-1].replace('_allOGCleanresults.','_AA.ORF.')\ + .replace('.tsv','.fasta')+'_1e-10keepall_BlastOutall.oneHit' + + args.file_prefix = args.input_file.split('/')[-1].split('_Filtered.Final')[0] + if 'fasta' in args.file_prefix: + args.file_prefix = args.name + + args.r2g_aa = args.all_output_folder + '/ReadyToGo/ReadyToGo_AA/' + args.r2g_ntd = args.all_output_folder + '/ReadyToGo/ReadyToGo_NTD/' + args.r2g_tsv = args.all_output_folder + '/ReadyToGo/ReadyToGo_TSV/' + args.r2g_xml = args.all_output_folder + '/ReadyToGo/ReadyToGo_XML/' + + + return args + + +########################################################################################### +###------------------------------- Script Usage Message --------------------------------### +########################################################################################### + +def usage_msg(): + return (color.BOLD+color.RED+'\n\nExample usage:'+color.CYAN+' python 7_FinalizeName.py'\ + ' --input_file ../ToRename/Op_me_Xxma_Filtered.Final.AA.ORF.fasta --name Op_me_Xxma'+color.END) + + +########################################################################################## +###-------- Storage for LARGE (Annoying) Print Statements for Flagged Options ---------### +########################################################################################## + +def return_more_info(args): + + valid_args = 0 + + author = (color.BOLD+color.ORANGE+'\n\n\tQuestions/Comments? Email Xyrus (author) at'\ + ' maurerax@gmail.com\n\n'+color.END) + + if args.author == True: + print (author) + valid_args += 1 + + if args.input_file.endswith('AA.ORF.fasta'): + args.input_NTD = args.input_file.replace('AA.ORF.fasta','NTD.ORF.fasta') + args.input_AA = args.input_file +# args.input_TSV = ('/').join(args.input_file.split('/')[:-1])+'/SpreadSheets/'+args.input_file.split('/')[-1].replace('AA.ORF.fasta','allOGCleanresults.tsv') + args.input_TSV = args.input_file.replace('AA.ORF.fasta','allOGCleanresults.tsv') + + elif args.input_file.endswith('NTD.ORF.fasta'): + args.input_NTD = args.input_file + args.input_AA = args.input_file.replace('NTD.ORF.fasta','AA.ORF.fasta') +# args.input_TSV = ('/').join(args.input_file.split('/')[:-1])+'/SpreadSheets/'+args.input_file.split('/')[-1].replace('NTD.ORF.fasta','allOGCleanresults.tsv') + args.input_TSV = args.input_file.replace('AA.ORF.fasta','allOGCleanresults.tsv') + print(args.input_TSV) + + if os.path.isfile(args.input_NTD) != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Nucleotide '\ + 'Fasta file ('+color.DARKCYAN+args.input_NTD.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_args += 1 + + if os.path.isfile(args.input_AA) != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided Protein '\ + 'Fasta file ('+color.DARKCYAN+args.input_AA.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_args += 1 + + if os.path.isfile(args.input_TSV) != True: + print (color.BOLD+color.RED+'\nError:'+color.END+color.BOLD+' The provided TSV '\ + ' file ('+color.DARKCYAN+args.input_TSV.split('/')[-1]+color.END+color.BOLD+')\ndoes not'\ + ' exist or is incorrectly formatted.\n\nDouble-check then try again!\n\n'+color.END) + valid_args += 1 + + return valid_args + +########################################################################################### +###-------------------- Double Checks Format for 10-Character Code ---------------------### +########################################################################################### + +def check_code(args): + + check_name = args.name.split('_') + + if len(args.name) != 10: + print (color.BOLD+'\n\nNew Species Prefix is not 10 characters long\n\n') + print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\ + 'Am_ar_Ehis\n\n'+color.END) + sys.exit() + + elif args.name.count('_') != 2: + print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n') + print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\ + 'Am_ar_Ehis\n\n'+color.END) + + sys.exit() + + if len(check_name[0]) == 2 and len(check_name[1]) == 2 and len(check_name[2]) == 4: + print (color.BOLD+"\n\nRenaming "+color.ORANGE+args.input_file.split('/')[-1]\ + .split('_Filtered')[0]+color.END+color.BOLD+"'s files with the following 10-character\n"\ + "code: "+color.CYAN+args.name+color.END+'\n') + else: + print (color.BOLD+'\n\nCheck the format of your Species Prefix!\n\n') + print ('Three examples below:\n'+color.CYAN+'\n\tSr_ci_Cunc\n\n\tOp_me_Hsap\n\n\t'\ + 'Am_ar_Ehis\n\n'+color.END) + sys.exit() + + +########################################################################################## +###------------------------- Creates Folders For Storing Data -------------------------### +########################################################################################## + +def prep_folders(args): + + + if os.path.isdir(args.all_output_folder + '/ReadyToGo/') != True: + os.system('mkdir ' + args.all_output_folder + '/ReadyToGo') + + + if os.path.isdir(args.r2g_ntd) != True: + os.system('mkdir ' + args.r2g_ntd) + if os.path.isdir(args.r2g_aa) != True: + os.system('mkdir ' + args.r2g_aa) + if os.path.isdir(args.r2g_tsv) != True: + os.system('mkdir ' + args.r2g_tsv) + if os.path.isdir(args.r2g_xml) != True: + os.system('mkdir ' + args.r2g_xml) + + if os.path.isdir(args.all_output_folder + '/' + args.file_prefix + '/Renamed') != True: + os.system('mkdir ' + args.all_output_folder + '/' + args.file_prefix + '/Renamed') + +########################################################################################### +###----------- Renames the NTD and AA CDSs with the Given 10-Character Code ------------### +########################################################################################### + +def rename_paralogs(args): + + home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/' + + print (color.BOLD+'\nRenaming Translated (Protein) '+color.PURPLE+'ORFs\n'+color.END) + renamed_Final_Prots = open(args.input_AA).read().replace('>','>'+args.name+'_XX_') + + print (color.BOLD+'\nRenaming Nucleotide '+color.PURPLE+'ORFs\n'+color.END) + renamed_Final_Nucs = open(args.input_NTD).read().replace('>','>'+args.name+'_XX_') + + + print (color.BOLD+'\nUpdating CDS Names in the Spreadsheet'+color.END) + if '\n\n' in open(args.input_TSV).read(): + renamed_Final_tsv = args.name+'_XX_'+open(args.input_TSV).read().rstrip('\n')\ + .replace('\n\n','\n'+args.name+'_XX_') + else: + renamed_Final_tsv = args.name+'_XX_'+open(args.input_TSV).read().rstrip('\n')\ + .replace('\n','\n'+args.name+'_XX_') + + with open(home_folder+args.name+'_XX_'+args.input_AA.split('/')[-1],'w+') as w: + w.write(renamed_Final_Prots) + + with open(home_folder+args.name+'_XX_'+args.input_NTD.split('/')[-1],'w+') as x: + x.write(renamed_Final_Nucs) + + + with open(home_folder+args.name+'_XX_'+args.input_TSV.split('/')[-1],'w+') as y: + y.write(renamed_Final_tsv) + + +########################################################################################### +###--------------------------------- Header/Tail Lines ---------------------------------### +########################################################################################### + +def header_tail(): + header = '\n\n'\ + '\n blastp\n BLASTP 2.2.29+\n'\ + ' Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.\n'\ + ' ../OGBlastDB/renamed_aa_seqs_OrthoMCL-5_12653.fasta\n Query_1\n' + + tail = '\n' + return header, tail + + +########################################################################################### +###------------------------------- TSV to XML Conversion -------------------------------### +########################################################################################### + +def convert_TSV_data(args): + + home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/' + + TSVforConvert = home_folder+args.name+'_XX_'+args.input_TSV.split('/')[-1] + + inTSV = [line.rstrip('\n') for line in open(TSVforConvert).readlines() if line != '\n'] + + iterations = [] + + for n in range(len(inTSV)): + if n == 0: + iterations.append(' '+inTSV[n].split('\t')[0]+'\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'\n'\ + ' \n \n BLOSUM62\n 1e-10\n'\ + ' 11\n 1\n F\n'\ + ' \n \n\n\n 1\n Query_1\n'\ + ' '+inTSV[n].split('\t')[0]+'\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'\n'\ + '\n\n 1\n Fake_Entry\n '+inTSV[n].split('\t')[1]+'\n Fake_Accession\n'\ + ' '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'\n \n \n 1\n 1234\n'\ + ' '+inTSV[n].split('\t')[-1]+'\n '+inTSV[n].split('\t')[-2]+'\n '+inTSV[n].split('\t')[-4]+'\n'\ + ' '+inTSV[n].split('\t')[-3]+'\n '+inTSV[n].split('\t')[-4]+'\n '+inTSV[n].split('\t')[-3]+'\n'\ + ' 0\n 0\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n'\ + ' '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n 0\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n'\ + ' \n \n \n \n \n\n'\ + '\n\n \n \n 379660\n 197499634\n'\ + ' 123\n 184705217500\n 0.041\n'\ + ' 0.267\n 0.14\n \n \n\n') + else: + iterations.append('\n '+str(n+1)+'\n Query_'+str(n+1)+'\n'\ + ' '+inTSV[n].split('\t')[0]+'\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'\n'\ + '\n\n 1\n Fake_Entry\n '+inTSV[n].split('\t')[1]+'\n Fake_Accession\n'\ + ' '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])+1))+'\n \n \n 1\n 1234\n'\ + ' '+inTSV[n].split('\t')[-1]+'\n '+inTSV[n].split('\t')[-2]+'\n '+inTSV[n].split('\t')[-4]+'\n'\ + ' '+inTSV[n].split('\t')[-3]+'\n '+inTSV[n].split('\t')[-4]+'\n '+inTSV[n].split('\t')[-3]+'\n'\ + ' 0\n 0\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n'\ + ' '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n 0\n '+str(abs(int(inTSV[n].split('\t')[-3])-int(inTSV[n].split('\t')[-4])))+'\n'\ + ' \n \n \n \n \n\n'\ + '\n\n \n \n 379660\n 197499634\n'\ + ' 123\n 184705217500\n 0.041\n'\ + ' 0.267\n 0.14\n \n \n\n') + + return iterations + + +########################################################################################### +###--------------------------- Writes Out the Fake XML File ----------------------------### +########################################################################################### + +def write_Fake_XML(args): + + home_folder = args.all_output_folder + '/' + args.file_prefix + '/' + + print (color.BOLD+'\n\nConverting '+color.ORANGE+args.input_file.split('/')[-1]+color.END\ + +color.BOLD+' to XML format\n'+color.END) + + header, tail = header_tail() + + iterations = convert_TSV_data(args) + + with open(home_folder+args.out_XML,'w+') as w: + w.write(header) + w.write(''.join(iterations)) + w.write(tail) + +########################################################################################## +###-------------------- Cleans up the Folder and Moves Final Files --------------------### +########################################################################################## +def clean_up(args): + + home_folder = args.all_output_folder + '/' + args.file_prefix + '/Renamed/' + + os.system('cp ' + args.all_output_folder + '/' + args.file_prefix+'/'+args.out_XML+' '+args.r2g_xml) + + os.system('cp '+home_folder+'*tsv '+args.r2g_tsv) + + os.system('cp '+home_folder+'*_XX_*AA.ORF.fasta '+args.r2g_aa) + os.system('cp '+home_folder+'*_XX_*NTD.ORF.fasta '+args.r2g_ntd) + + os.system('cp '+home_folder+'*_XX_*tsv ' + args.all_output_folder + '/' + args.file_prefix) + os.system('cp '+home_folder+'*_XX_*AA.ORF.fasta ' + args.all_output_folder + '/' + args.file_prefix) + os.system('cp '+home_folder+'*_XX_*NTD.ORF.fasta ' + args.all_output_folder + '/' + args.file_prefix) + + os.system('rm ' + args.all_output_folder + '/ToRename/*'+args.file_prefix+'*') + + if os.path.isdir(args.all_output_folder + '/Finished/') != True: + os.system('mkdir ' + args.all_output_folder + '/Finished') + + os.system('mv ' + args.all_output_folder + '/' + args.file_prefix + ' ' + args.all_output_folder + '/Finished') + +########################################################################################### +###-------------------------------- Next Script Message --------------------------------### +########################################################################################### + +def next_script(args): + + print (color.BOLD+'\nThere is no next script! The final '+color.ORANGE+args.out_XML\ + .split('_XX')[0]+color.END+color.BOLD+' files can be\nfound in the '+color.RED+\ + args.out_XML.split('_XX_')[-1].split('_Filtered')[0]+color.END+color.BOLD+' and '\ + +color.RED+'ReadyToGo folders'+color.END+color.BOLD+' and are ready\n'\ + 'for the KatzLab Phylogenomic Tree-Building Steps!\n\n'+color.END) + +########################################################################################## +###--------------- Checks Command Line Arguments and Calls on Functions ---------------### +########################################################################################## + +def main(): + + args = check_args() + + check_code(args) + + prep_folders(args) + + rename_paralogs(args) + + write_Fake_XML(args) + + clean_up(args) + + next_script(args) + +main() \ No newline at end of file diff --git a/PTL1/Transcriptomes/Scripts/8_SummaryStats.py b/PTL1/Transcriptomes/Scripts/8_SummaryStats.py new file mode 100644 index 0000000..c356438 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/8_SummaryStats.py @@ -0,0 +1,269 @@ +import os, sys +import argparse +from Bio import SeqIO +import CUB +from statistics import mean +from math import ceil, floor +from tqdm import tqdm +import matplotlib.pyplot as plt +import numpy as np + + +def get_args(): + + parser = argparse.ArgumentParser( + prog = 'PTL6p1 Script 8: Stat Summary', + description = "Updated March 31th, 2023 by Auden Cote-L'Heureux" + ) + + parser.add_argument('-i', '--input', type = str, required = True, help = 'Input path to the "Output" folder produced by PhyloToL Part 1. This folder should contain both the "ReadyToGO" and "Intermediate" folders.') + parser.add_argument('-d', '--databases', type = str, default = '../Databases', help = 'Path to databases folder') + parser.add_argument('-r', '--r2g_jf', action = 'store_true', help = 'Create ReadyToGo files filtered to only include sequences between the 25th and 75th percentile of silent-site GC content. Please be aware that these are not necessarily the correct or non-contaminant sequences; examine the GC3xENc plots carefully before using these data.') + + return parser.parse_args() + + +def hook_lens(args): + + print('\nGetting average OG lengths in the Hook DB...') + + len_by_og = { } + for file in os.listdir(args.databases + '/db_OG'): + if file.endswith('.fasta') and os.path.isfile(args.databases + '/db_OG/' + file.replace('.fasta', '.dmnd')): + for rec in tqdm(SeqIO.parse(args.databases + '/db_OG/' + file, 'fasta')): + if rec.id[-10:] not in len_by_og: + len_by_og.update({ rec.id[-10:] : [] }) + + len_by_og[rec.id[-10:]].append(len(str(rec.seq))) + + for og in len_by_og: + len_by_og[og] = mean(len_by_og[og]) + + return len_by_og + + +def aa_comp_lengths(args, gcodes): + + print('\nGetting amino acid composition data from ReadyToGo files...') + + r2g_lengths = { }; aa_comp = { }; recid_by_contig_n = { } + for file in tqdm([f for f in os.listdir(args.input + '/ReadyToGo/ReadyToGo_AA')]): + if file.endswith('.fasta') and file[:10] in gcodes: + for rec in SeqIO.parse(args.input + '/ReadyToGo/ReadyToGo_AA/' + file, 'fasta'): + r2g_lengths.update({ rec.id : len(str(rec.seq)) * 3 }) + + fymink = 0; garp = 0; other = 0; total = 0 + for char in str(rec.seq): + if char in 'FYMINK': + fymink += 1 + elif char in 'GARP': + garp += 1 + else: + other += 1 + + total += 1 + + aa_comp.update({ rec.id : { 'FYMINK' : fymink/total, 'GARP' : garp/total, 'Other' : other/total } }) + + recid_by_contig_n.update({ rec.id.split('Contig_')[-1].split('_')[0] : rec.id }) + + print('\nGetting transcript sequence data from original assembled transcript files...') + + transcripts = { }; transcript_id_corr = { } + for tax in tqdm([f for f in os.listdir(args.input + '/Intermediate/TranslatedTranscriptomes')]): + if os.path.isdir(args.input + '/Intermediate/TranslatedTranscriptomes/' + tax + '/OriginalFasta'): + for file in os.listdir(args.input + '/Intermediate/TranslatedTranscriptomes/' + tax + '/OriginalFasta'): + if file.endswith('Original.fasta') and file[:10] in gcodes: + for rec in SeqIO.parse(args.input + '/Intermediate/TranslatedTranscriptomes/' + tax + '/OriginalFasta/' + file, 'fasta'): + transcripts.update({ rec.id : (file[:10], str(rec.seq)) }) + if rec.id.split('NODE_')[-1].split('_')[0] in recid_by_contig_n: + transcript_id_corr.update({ recid_by_contig_n[rec.id.split('NODE_')[-1].split('_')[0]] : rec.id}) + + return aa_comp, transcripts, r2g_lengths, transcript_id_corr + + +def get_nuc_comp(args, gcodes): + + print('\nGetting nucleotide composition data from ReadyToGo files...') + + nuc_comp = { } + for file in tqdm([f for f in os.listdir(args.input + '/ReadyToGo/ReadyToGo_NTD')]): + if file.endswith('.fasta') and file[:10] in gcodes: + cub_out = CUB.CalcRefFasta(args.input + '/ReadyToGo/ReadyToGo_NTD/' + file, gcodes[file[:10]])[0] + for k in cub_out: + nuc_comp.update({ k : cub_out[k] }) + + return nuc_comp + + +def per_seq(args, nuc_comp, aa_comp, all_transcripts, r2g_lengths, transcript_id_corr): + + og_mean_lens = hook_lens(args) + + if not os.path.isdir(args.input + '/PerSequenceStatSummaries'): + os.mkdir(args.input + '/PerSequenceStatSummaries') + + taxa = list(dict.fromkeys([seq[:10] for seq in nuc_comp])) + + for taxon in taxa: + with open(args.input + '/PerSequenceStatSummaries/' + taxon + '.csv', 'w') as o: + o.write('Sequence,Taxon,OG,Transcript,TranscriptLength,CDSLength,AvgLengthOGinHook,AmbiguousCodons,GC-Overall,GC1,GC2,GC3,GC3-Degen,ExpWrightENc,ObsWrightENc_6Fold,ObsWrightENc_No6Fold,ObsWeightedENc_6Fold,ObsWeightedENc_No6Fold,FYMINK,GARP,OtherAA\n') + for rec in nuc_comp: + if rec[:10] == taxon: + o.write(rec + ',' + rec[:10] + ',' + rec[-10:]) + + try: + o.write(',' + transcript_id_corr[rec] + ',' + str(len(all_transcripts[transcript_id_corr[rec]][1]))) + except KeyError: + o.write(',NA,NA') + + o.write(',' + str(r2g_lengths[rec]) + ',' + str(og_mean_lens[rec[-10:]])) + + v = nuc_comp[rec] + gcs = [str(v.gcOverall), str(v.gc1), str(v.gc2), str(v.gc3), str(v.gc4F)] + ENc = [str(v.expENc), str(v.obsENc_6F), str(v.obsENc_No6F), str(v.SunENc_6F),str(v.SunENc_No6F)] + o.write(',' + ','.join([str(v.amb_cdn)] + gcs + ENc)) + + o.write(',' + str(aa_comp[rec]['FYMINK']) + ',' + str(aa_comp[rec]['GARP']) + ',' + str(aa_comp[rec]['Other']) + '\n') + + +def per_tax(args, nuc_comp, aa_comp, all_transcripts, r2g_lengths, gcodes): + + taxa = list(dict.fromkeys([seq[:10] for seq in nuc_comp])) + + with open(args.input + '/PerTaxonSummary.csv', 'w') as o: + o.write('Taxon,TranscriptsInput,Median_GCTranscripts,IQR_GCTranscripts,Median_LenTranscripts,IRQ_LenTranscripts,SeqsR2G,OGsR2G,Median_GC3R2G,IQR_GC3R2G,Median_ENcR2G,IQR_ENcR2G,Median_LenR2G,IQR_LenR2G,GeneticCode\n') + + for taxon in taxa: + o.write(taxon) + + transcripts = [all_transcripts[seq][1].upper() for seq in all_transcripts if all_transcripts[seq][0] == taxon] + o.write(',' + str(len(transcripts))) + + transcript_gcs = [] + for transcript in transcripts: + transcript_gcs.append((transcript.count('G') + transcript.count('C'))/len(transcript)) + + transcript_gcs = sorted(transcript_gcs) + o.write(',' + str(transcript_gcs[floor(len(transcripts)*0.5)])) + o.write(',' + str(transcript_gcs[floor(len(transcripts)*0.75)] - transcript_gcs[floor(len(transcripts)*0.25)])) + + transcript_lens = sorted([len(transcript) for transcript in transcripts]) + o.write(',' + str(transcript_lens[floor(len(transcripts)*0.5)])) + o.write(',' + str(transcript_lens[floor(len(transcripts)*0.75)] - transcript_lens[floor(len(transcripts)*0.25)])) + + r2g_ntds = [nuc_comp[seq] for seq in nuc_comp if seq[:10] == taxon] + o.write(',' + str(len(r2g_ntds))) + r2g_ogs = list(dict.fromkeys([seq[-10:] for seq in nuc_comp if seq[:10] == taxon])) + o.write(',' + str(len(r2g_ogs))) + + r2g_gc3s = sorted([seq.gc4F for seq in r2g_ntds]) + o.write(',' + str(r2g_gc3s[floor(len(r2g_ntds)*0.5)])) + o.write(',' + str(r2g_gc3s[floor(len(r2g_gc3s)*0.75)] - r2g_gc3s[floor(len(r2g_gc3s)*0.25)])) + + r2g_encs = sorted([seq.obsENc_6F for seq in r2g_ntds]) + o.write(',' + str(r2g_encs[floor(len(r2g_encs)*0.5)])) + o.write(',' + str(r2g_encs[floor(len(r2g_encs)*0.75)] - r2g_encs[floor(len(r2g_encs)*0.25)])) + + tax_r2g_lens = sorted([r2g_lengths[seq] for seq in r2g_lengths if seq[:10] == taxon]) + o.write(',' + str(tax_r2g_lens[floor(len(tax_r2g_lens)*0.5)])) + o.write(',' + str(tax_r2g_lens[floor(len(tax_r2g_lens)*0.75)] - tax_r2g_lens[floor(len(tax_r2g_lens)*0.25)])) + + o.write(',' + gcodes[taxon] + '\n') + + +def r2g_jf(args, nuc_comp, gcodes): + + #Q: should there be an maximum IQR cutoff at which we do NOT produce a file here? + + if not os.path.isdir(args.input + '/ReadyToGo/ReadyToGo_NTD_JF'): + os.mkdir(args.input + '/ReadyToGo/ReadyToGo_NTD_JF') + + if not os.path.isdir(args.input + '/ReadyToGo/ReadyToGo_AA_JF'): + os.mkdir(args.input + '/ReadyToGo/ReadyToGo_AA_JF') + + for file in os.listdir(args.input + '/ReadyToGo/ReadyToGo_NTD'): + if file.endswith('.fasta') and file[:10] in gcodes: + taxon = file[:10] + + r2g_ntds = [nuc_comp[seq] for seq in nuc_comp if seq[:10] == taxon] + r2g_gc3s = sorted([seq.gc4F for seq in r2g_ntds]) + + with open(args.input + '/ReadyToGo/ReadyToGo_NTD_JF/' + file.replace('.fasta', '.JF.fasta'), 'w') as o: + for rec in SeqIO.parse(args.input + '/ReadyToGo/ReadyToGo_NTD/' + file, 'fasta'): + if nuc_comp[rec.id].gc4F > r2g_gc3s[floor(len(r2g_gc3s)*0.25)] and nuc_comp[rec.id].gc4F < r2g_gc3s[floor(len(r2g_gc3s)*0.75)]: + o.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n') + + with open(args.input + '/ReadyToGo/ReadyToGo_AA_JF/' + file.replace('.fasta', '.JF.fasta').replace('NTD', 'AA'), 'w') as o: + for rec in SeqIO.parse(args.input + '/ReadyToGo/ReadyToGo_AA/' + file.replace('NTD', 'AA'), 'fasta'): + if nuc_comp[rec.id].gc4F > r2g_gc3s[floor(len(r2g_gc3s)*0.25)] and nuc_comp[rec.id].gc4F < r2g_gc3s[floor(len(r2g_gc3s)*0.75)]: + o.write('>' + rec.id + '\n' + str(rec.seq) + '\n\n') + + +def plot_jf(args, nuc_comp): + + if not os.path.isdir(args.input + '/GC3xENc_Plots'): + os.mkdir(args.input + '/GC3xENc_Plots') + + taxa = list(dict.fromkeys([rec[:10] for rec in nuc_comp])) + + gc3_null = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + enc_null = [31, 31.5958, 32.2032, 32.8221, 33.4525, 34.0942, 34.7471, 35.411, 36.0856, 36.7707, 37.4659, 38.1707, 38.8847, 39.6074, 40.3381, 41.0762, 41.8208, 42.5712, 43.3264, 44.0854, 44.8471, 45.6102, 46.3735, 47.1355, 47.8949, 48.65, 49.3991, 50.1406, 50.8725, 51.593, 52.3, 52.9916, 53.6656, 54.32, 54.9525, 55.561, 56.1434, 56.6975, 57.2211, 57.7124, 58.1692, 58.5898, 58.9723, 59.3151, 59.6167, 59.8757, 60.0912, 60.2619, 60.3873, 60.4668, 60.5, 60.4668, 60.3873, 60.2619, 60.0912, 59.8757, 59.6167, 59.3151, 58.9723, 58.5898, 58.1692, 57.7124, 57.2211, 56.6975, 56.1434, 55.561, 54.9525, 54.32, 53.6656, 52.9916, 52.3, 51.593, 50.8725, 50.1406, 49.3991, 48.65, 47.8949, 47.1355, 46.3735, 45.6102, 44.8471, 44.0854, 43.3264, 42.5712, 41.8208, 41.0762, 40.3381, 39.6074, 38.8847, 38.1707, 37.4659, 36.7707, 36.0856, 35.411, 34.7471, 34.0942, 33.4525, 32.8221, 32.2032, 31.5958, 31] + + for taxon in taxa: + comp_data = [(nuc_comp[rec].gc4F, nuc_comp[rec].obsENc_6F) for rec in nuc_comp if rec[:10] == taxon] + + plt.figure() + plt.plot(np.array(gc3_null), np.array(enc_null), color = 'black', linewidth=2) + plt.scatter(np.array([val[0] for val in comp_data]), np.array([val[1] for val in comp_data]), s = 1) + plt.xlabel("GC content (3rd pos, 4-fold sites)") + plt.ylabel("Observed Wright ENc (6 Fold)") + plt.savefig(args.input + '/GC3xENc_Plots/' + taxon + '.png') + +if __name__ == "__main__": + args = get_args() + + valid_codes = ['universal', 'blepharisma', 'chilodonella', 'condylostoma', 'euplotes', 'peritrich', 'vorticella', 'mesodinium', 'tag', 'tga', 'taa', 'none'] + + gcodes = { } + if os.path.isfile(args.input + '/Intermediate/gcode_output.tsv'): + for line in open(args.input + '/Intermediate/gcode_output.tsv'): + if len(line.split('\t')) == 5 and line.split('\t')[4].strip().lower() in valid_codes: + gcodes.update({ line.split('\t')[0] : line.split('\t')[4].strip() }) + elif line.split('\t')[4].strip().lower() != '': + print('\nInvalid genetic code assignment for taxon ' + line.split('\t')[0] + '. Skipping this taxon in script 8 (summary statistics)\n') + else: + print('\nGenetic code assignment file (Output/Intermediate/gcode_output.tsv) not found. Quitting script 8 (summary statistics).\n') + exit() + + aa_comp, transcripts, r2g_lengths, transcript_id_corr = aa_comp_lengths(args, gcodes) + nuc_comp = get_nuc_comp(args, gcodes) + + per_tax(args, nuc_comp, aa_comp, transcripts, r2g_lengths, gcodes) + per_seq(args, nuc_comp, aa_comp, transcripts, r2g_lengths, transcript_id_corr) + + if args.r2g_jf: + r2g_jf(args, nuc_comp, gcodes) + + plot_jf(args, nuc_comp) + + + + + + + + + + + + + + + + + + + + + diff --git a/PTL1/Transcriptomes/Scripts/CUB.py b/PTL1/Transcriptomes/Scripts/CUB.py new file mode 100644 index 0000000..382e218 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/CUB.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python3 +# coding=utf-8 + +'''Aim of this script is to generate lots of codon usage statistics to aid in +identifying useful characteristics for de novo ORF calling''' + +# Author: Xyrus Maurer-Alcalá +# Contact: maurerax@gmail.com or xyrus.maurer-alcala@izb.unibe.ch +# Last Modified: 2020-09-17 +# usage: python CUB.py + +# Dependencies: +# Python3, numpy, BioPython + +import os +import re +import sys +#import matplotlib.pyplot as plt +import numpy as np +#import seaborn as sns + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqUtils import GC + + +class CalcCUB: + """ + Returns the Effective Number of Codons used (observed and expected) + following the equations originally from Wright 1990. + """ + def expWrightENc(gc3): + # Calculates the expected ENc from a sequence's GC3 under Wright 1990 + if gc3 > 1: + # If GC3 looks as though it is > 1 (e.g. 100%), converts to a float ≤ 1. + # Calculations expect a value between 0 and 1 + gc3 = gc3/100 + exp_enc = 2+gc3+(29/((gc3**2)+(1-gc3)**2)) + return round(exp_enc, 4) + + def nullENcGC3(): + # Calculates the expected ENc from the null distribution of GC3 + # values (0, 100% GC) + null = [CalcCUB.expWrightENc(n) for n in np.arange(0,.51,0.01)] + null += null[:-1][::-1] + return [str(i)+'\t'+str(j) for i, j in zip([n for n in range(0, 101)],null)] + + + def calcWrightENc(cdnTable): + # Follows Wright's (1990) calculations for determining ENc scores. + + def faCalcWright(aa_counts): + # Returns the codon homozygosity (fa) for a given "type" of AA (e.g. + # 2-fold degeneracy). + counts = [i[2] for i in aa_counts] + # n_aa --> number of this particular AA + n_aa = sum(counts) + # fa --> codon homozygosity + try: + fa = (((n_aa*sum([(i/float(n_aa))**2 for i in counts]))-1)/(n_aa-1)) + except: + fa = 0 + return fa + + def ENcWright_by_Degen(fa_data): + # Same as used in Wright 1990, averages the homozygosity across all codons + # of a given class (e.g. 2-fold degeneracy) + + # Codons without any degeneracy (e.g. ATG == M) have 100% homozygosity + # and provide a "base" for the ENc score + enc = 2 + for k, v in fa_data.items(): + non_zero_vals, non_zero_sum = len([i for i in v if i != 0]), sum([i for i in v if i != 0]) + try: + f_aa = non_zero_sum/non_zero_vals + except: + f_aa = 1 + enc += k/f_aa + return enc + + # Determines the number of degenerate groups to use (i.e. whether 6-Fold + # degeneracy is present). + degen_cdns = {} + for k, v in cdnTable.items(): + if v[1] not in degen_cdns.keys(): + degen_cdns[v[1]] = [v[0]] + else: + if v[0] not in degen_cdns[v[1]]: + degen_cdns[v[1]] += [v[0]] + + # Calculates codon homozygosity (fa) for each amino acid. Groups the + # resulting values based on the amino acids degeneracy (e.g. 'two-fold'). + fa_cdns = {len(v):[] for k, v in degen_cdns.items() if 'one' not in k} + + for k, v in degen_cdns.items(): + # Skip codons lacking degeneracy + if 'one' in k: + continue + + for aa in v: + aa_counts = [cdnTable[k] for k in cdnTable.keys() if cdnTable[k][0] == aa] + fa_cdns[len(v)] += [faCalcWright(aa_counts)] + enc_val = min(61, round(ENcWright_by_Degen(fa_cdns),4)) + return enc_val + + def SunEq5(cdnTable): + def calcFcf(aa_counts): + counts = [i[2] for i in aa_counts] + pseudocounts = [i+1 for i in counts] + na = sum(pseudocounts) + fcf = sum([(i/float(na))**2 for i in pseudocounts]), sum(pseudocounts) + return fcf + + ENcWeightedPsuedo = 0 + degen_cdns = {} + + for k, v in cdnTable.items(): + if v[1] == 'none': + continue + if v[1] not in degen_cdns.keys(): + degen_cdns[v[1]] = [v[0]] + else: + if v[0] not in degen_cdns[v[1]]: + degen_cdns[v[1]] += [v[0]] + for k, v in degen_cdns.items(): + fcf_nc = [] + for aa in v: + aa_counts = [cdnTable[k] for k in cdnTable.keys() if cdnTable[k][0] == aa] + fcf_nc.append(calcFcf(aa_counts)) + weightedENc = (len(fcf_nc) / + (sum([i[0]*i[1] for i in fcf_nc]) / + sum([i[1] for i in fcf_nc]))) + ENcWeightedPsuedo += weightedENc + return round(ENcWeightedPsuedo,4) + + def calcRCSU(cdnTbl): + rscu = {k:[v[0]] for k, v in cdnTbl.items() if v[0].isalpha()} + for k, v in rscu.items(): + try: + aa_info = [(key, val[-1]) for key, val in cdnTbl.items() if val[0] == v[0]] + aa_cnts = [x[1] for x in aa_info] + cdn_rscu = (cdnTbl[k][-1]*len(aa_cnts))/sum(aa_cnts) + rscu[k] += [str(round(cdn_rscu,4))] + except: + rscu[k] += ['0.0'] + return rscu + + +class GenUtil(object): + """ + "Overflow" of functions for now. Just a precaution to make the code a + little cleaner/easier to manage. + + This class inclues means to normalize/check the user-provided genetic code, + which if not valid will default to the "universal" genetic code. + + Similarly, This class will return the appropriate + codon count table and provides a function to update its values. + """ + def convertGenCode(gCode): + # Will interpret the user provided genetic code (gcode) and checks that + # it is currently available for use with the NCBI/biopython + # supported translation tables. Default is universal. + # Dictionary of the possible/functional genetic codes that are supported. + # --- Chilodonella and condylostoma are to come! + transTable = {'universal':1, 'blepharisma':4, + 'ciliate':6, 'euplotes':10, 'mesodinium':29, 'myrionecta':29, 'peritrich':30, + '1':1, '4':4, '6':6, '10':10, '29':29, '30':30, 'chilo':'chilo'} + + if str(gCode).lower() not in transTable: + print("\nWarning: Provided genetic code is not supported (yet).\n") + print("Currently running using the UNIVERSAL genetic code.\n\n") + print("Alternative genetic codes are as follows (Note: numbers "\ + "correspond to NCBI genetic code tables):\n") + print('\n'.join(list(transTable.keys()))+'\n') + return 'Universal',1 + else: + return gCode,transTable[str(gCode).lower()] + + def getCDNtable(gCode): + # Returns the appropriate codon table to be used for the ENc calculations. + # Universal codon table, with 6-fold degenerate codons split + # into four-fold and two-fold groups. + universal_no6fold = { + 'GCT': ['A', 'four', 0], 'GCC': ['A', 'four', 0], 'GCA': ['A', 'four', 0], + 'GCG': ['A', 'four', 0], 'CGT': ['R', 'four', 0], 'CGC': ['R', 'four', 0], + 'CGG': ['R', 'four', 0], 'CGA': ['R', 'four', 0], 'AGA': ['R_', 'two', 0], + 'AGG': ['R_', 'two', 0], 'AAT': ['N', 'two', 0], 'AAC': ['N', 'two', 0], + 'GAT': ['D', 'two', 0], 'GAC': ['D', 'two', 0], 'TGT': ['C', 'two', 0], + 'TGC': ['C', 'two', 0], 'CAA': ['Q', 'two', 0], 'CAG': ['Q', 'two', 0], + 'GAA': ['E', 'two', 0], 'GAG': ['E', 'two', 0], 'GGT': ['G', 'four', 0], + 'GGC': ['G', 'four', 0], 'GGA': ['G', 'four', 0], 'GGG': ['G', 'four', 0], + 'CAT': ['H', 'two', 0], 'CAC': ['H', 'two', 0], 'ATT': ['I', 'three', 0], + 'ATC': ['I', 'three', 0], 'ATA': ['I', 'three', 0], 'ATG': ['M', 'one', 0], + 'TTA': ['L_', 'two', 0], 'TTG': ['L_', 'two', 0], 'CTT': ['L', 'four', 0], + 'CTC': ['L', 'four', 0], 'CTA': ['L', 'four', 0], 'CTG': ['L', 'four', 0], + 'AAA': ['K', 'two', 0], 'AAG': ['K', 'two', 0], 'TTT': ['F', 'two', 0], + 'TTC': ['F', 'two', 0], 'CCT': ['P', 'four', 0], 'CCC': ['P', 'four', 0], + 'CCA': ['P', 'four', 0], 'CCG': ['P', 'four', 0], 'TCT': ['S', 'four', 0], + 'TCC': ['S', 'four', 0], 'TCA': ['S', 'four', 0], 'TCG': ['S', 'four', 0], + 'AGT': ['S_', 'two', 0], 'AGC': ['S_', 'two', 0], 'ACT': ['T', 'four', 0], + 'ACC': ['T', 'four', 0], 'ACA': ['T', 'four', 0], 'ACG': ['T', 'four', 0], + 'TGG': ['W', 'one', 0], 'TAT': ['Y', 'two', 0], 'TAC': ['Y', 'two', 0], + 'GTT': ['V', 'four', 0], 'GTC': ['V', 'four', 0], 'GTA': ['V', 'four', 0], + 'GTG': ['V', 'four', 0], 'TAA': ['*', 'none', 0], 'TGA': ['*', 'none', 0], + 'TAG': ['*', 'none', 0], 'XXX': ['_missing', 'none', 0]} + + # Universal codon table, with 6-fold degenerate codons kept + # whole, no splitting! Traditional Universal codon table. + universal_6fold = { + 'GCT': ['A', 'four', 0], 'GCC': ['A', 'four', 0], 'GCA': ['A', 'four', 0], + 'GCG': ['A', 'four', 0], 'CGT': ['R', 'six', 0], 'CGC': ['R', 'six', 0], + 'CGG': ['R', 'six', 0], 'CGA': ['R', 'six', 0], 'AGA': ['R', 'six', 0], + 'AGG': ['R', 'six', 0], 'AAT': ['N', 'two', 0], 'AAC': ['N', 'two', 0], + 'GAT': ['D', 'two', 0], 'GAC': ['D', 'two', 0], 'TGT': ['C', 'two', 0], + 'TGC': ['C', 'two', 0], 'CAA': ['Q', 'two', 0], 'CAG': ['Q', 'two', 0], + 'GAA': ['E', 'two', 0], 'GAG': ['E', 'two', 0], 'GGT': ['G', 'four', 0], + 'GGC': ['G', 'four', 0], 'GGA': ['G', 'four', 0], 'GGG': ['G', 'four', 0], + 'CAT': ['H', 'two', 0], 'CAC': ['H', 'two', 0], 'ATT': ['I', 'three', 0], + 'ATC': ['I', 'three', 0], 'ATA': ['I', 'three', 0], 'ATG': ['M', 'one', 0], + 'TTA': ['L', 'six', 0], 'TTG': ['L', 'six', 0], 'CTT': ['L', 'six', 0], + 'CTC': ['L', 'six', 0], 'CTA': ['L', 'six', 0], 'CTG': ['L', 'six', 0], + 'AAA': ['K', 'two', 0], 'AAG': ['K', 'two', 0], 'TTT': ['F', 'two', 0], + 'TTC': ['F', 'two', 0], 'CCT': ['P', 'four', 0], 'CCC': ['P', 'four', 0], + 'CCA': ['P', 'four', 0], 'CCG': ['P', 'four', 0], 'TCT': ['S', 'six', 0], + 'TCC': ['S', 'six', 0], 'TCA': ['S', 'six', 0], 'TCG': ['S', 'six', 0], + 'AGT': ['S', 'six', 0], 'AGC': ['S', 'six', 0], 'ACT': ['T', 'four', 0], + 'ACC': ['T', 'four', 0], 'ACA': ['T', 'four', 0], 'ACG': ['T', 'four', 0], + 'TGG': ['W', 'one', 0], 'TAT': ['Y', 'two', 0], 'TAC': ['Y', 'two', 0], + 'GTT': ['V', 'four', 0], 'GTC': ['V', 'four', 0], 'GTA': ['V', 'four', 0], + 'GTG': ['V', 'four', 0], 'TAA': ['*', 'none', 0], 'TGA': ['*', 'none', 0], + 'TAG': ['*', 'none', 0], 'XXX': ['_missing', 'none', 0]} + + # Blepharisma (table 4) genetic code codon table, with 6-fold degenerate + # codons kept whole, no splitting! + blepharisma_6fold = {**universal_6fold, + 'TGA': ['W', 'two', 0], 'TGG': ['W', 'two', 0], + 'TAA': ['*', 'two', 0], 'TAG': ['*', 'two', 0]} + + # Blepharisma (table 4) genetic code codon table, with 6-fold degenerate + # codons split into four-fold and two-fold groups. + blepharisma_no6fold = {**universal_no6fold, + 'TGA': ['W', 'two', 0], 'TGG': ['W', 'two', 0], + 'TAA': ['*', 'two', 0], 'TAG': ['*', 'two', 0]} + + # Chilodonella genetic code codon table, with 6-fold degenerate + # codons kept whole, no splitting! + chilo_6fold = {**universal_6fold, + 'CAA': ['Q', 'four', 0], 'CAG': ['Q', 'four', 0], + 'TAA': ['*', 'one', 0], 'TAG': ['Q', 'four', 0], + 'TGA': ['Q', 'four', 0]} + + # Chilodonella genetic code codon table, with 6-fold degenerate + # codons split into four-fold and two-fold groups. + # Note that this also splits four-fold degenerate codons that OUGHT to + # be in "different" functional categories (e.g. CAG =/= TAG) + chilo_no6fold = {**universal_no6fold, + 'TAA': ['*', 'one', 0], 'TAG': ['Q_', 'one', 0], + 'TGA': ['Q_', 'one', 0]} + + # Ciliate (table 6) genetic code codon table, with 6-fold degenerate + # codons kept whole, no splitting! Traditional ciliate codon table. + ciliate_6fold = {**universal_6fold, + 'CAA': ['Q', 'four', 0], 'CAG': ['Q', 'four', 0], + 'TAA': ['Q', 'four', 0], 'TAG': ['Q', 'four', 0], + 'TGA': ['*', 'one', 0]} + + # Ciliate (table 6) genetic code codon table, with 6-fold degenerate + # codons split into four-fold and two-fold groups. + # Note that this also splits four-fold degenerate codons that OUGHT to + # be in "different" functional categories (e.g. CAA =/= TAA) + ciliate_no6fold = {**universal_no6fold, + 'TAA': ['Q_', 'two', 0], 'TAG': ['Q_', 'two', 0], + 'TGA': ['*', 'one', 0]} + + # Euplotes codon table, with 6-fold degenerate codons kept + # whole, no splitting! Traditional Universal codon table. + euplotes_6fold = {**universal_6fold, + 'TGA': ['C', 'three', 0], 'TGT': ['C', 'three', 0], + 'TGC': ['C', 'three', 0], 'TAA': ['*', 'two', 0], + 'TAG': ['*', 'two',0]} + + # Euplotes genetic code codon table, with 6-fold degenerate codons + # split into four-fold and two-fold groups. + euplotes_no6fold = {**universal_no6fold, + 'TGA': ['C', 'three', 0], 'TGT': ['C', 'three', 0], + 'TGC': ['C', 'three', 0], 'TAA': ['*', 'two', 0], + 'TAG': ['*', 'two',0]} + + # Mesodinium/Myrionecta (table 29) genetic code codon table, with 6-fold + # degenerate codons kept whole, no splitting! Traditional ciliate codon table. + mesodinium_6fold = {**universal_6fold, + 'TAA': ['Y', 'four', 0], 'TAT': ['Y', 'four', 0], + 'TAG': ['Y', 'four', 0], 'TAC': ['Y', 'four', 0], + 'TGA': ['*', 'one', 0]} + + # Mesodinium/Myrionecta (table 29) genetic code codon table, with 6-fold + # degenerate codons split into four-fold and two-fold groups. + mesodinium_no6fold = {**universal_no6fold, + 'TAA': ['Y', 'four', 0], 'TAT': ['Y', 'four', 0], + 'TAG': ['Y', 'four', 0], 'TAC': ['Y', 'four', 0], + 'TGA': ['*', 'one', 0]} + + # Peritrich (table 30) genetic code codon table, with 6-fold degenerate + # codons kept whole, no splitting! Traditional ciliate codon table. + peritrich_6fold = {**universal_6fold, + 'GAA': ['E', 'four', 0], 'GAG': ['E', 'four', 0], + 'TAA': ['E', 'four', 0], 'TAG': ['E', 'four', 0], + 'TGA': ['*', 'one', 0]} + + # Peritrich (table 30) genetic code codon table, with 6-fold degenerate + # codons split into four-fold and two-fold groups. + # Note that this also splits four-fold degenerate codons that OUGHT to + # be in "different" functional categories (e.g. CAA =/= TAA) + peritrich_no6fold = {**universal_no6fold, + 'TAA': ['E_', 'two', 0], 'TAG': ['E_', 'two', 0], + 'TGA': ['*', 'one', 0]} + + cdnTableDict = {1:[universal_no6fold,universal_6fold], + 4:[blepharisma_no6fold, blepharisma_6fold], + 6:[ciliate_no6fold,ciliate_6fold], + 10:[euplotes_no6fold,euplotes_6fold], + 29:[mesodinium_no6fold,mesodinium_6fold], + 30:[peritrich_no6fold,peritrich_6fold], + 'chilodonella':[chilo_no6fold,chilo_6fold], + 'chilo':[chilo_no6fold,chilo_6fold]} + return cdnTableDict[gCode] + + def mapCdns(seq, cdnTable): + # Updates the codon counts for a given sequence to the respective codon + # count table (e.g. with or without 6-fold degeneracy). + codons = [seq[n:n+3] for n in range(0, len(seq)-len(seq)%3, 3)] + amb_cdn = 0 + for c in codons: + try: + cdnTable[c][-1] += 1 + except: + amb_cdn += 1 + if cdnTable['TCC'][1] == 'six': + return cdnTable, amb_cdn + else: + return cdnTable + +class GCeval(): + """ + Returns %GC values from DNA sequences of various types. + """ + def gcTotal(seq): + # This function returns global GC content + return round(GC(seq), 4) + + def gc1(seq): + # This function return the GC content of the first position of a codon + return round(GC(''.join([seq[n] for n in range(0, len(seq), 3)])), 4) + + def gc2(seq): + # This function return the GC content of the second position of a codon + return round(GC(''.join([seq[n] for n in + range(1, len(seq)-len(seq[1:]) % 3, 3)])), 4) + + def gc3(seq): + # This function return the GC content of the third position of a codon + return round(GC(''.join([seq[n] for n in + range(2, len(seq)-len(seq[2:]) % 3, 3)])), 4) + + def gc3_4F(cdnTbl): + # # This function return the GC content of the third position of four-fold + # # degenerate codons + FrFold = round(GC(''.join([k[-1]*v[-1] for k, v in cdnTbl.items() if + 'one' not in v[1]])), 4) + return FrFold + +class SeqInfo(object): + """ + Provides a means to harbor the data for each individual contig/gene in a + given fasta file. + This includes GC content (various types), Effective Number of codons + (ENc; again various calculations), Relative Synonymous Codon Usage (RSCU). + """ + def __init__(self,seq,gcode='universal'): + self.ntd = str(seq) + self.gcode, self.transTable = GenUtil.convertGenCode(gcode) + # Dictionary of the GC-related functions/calculations + self.gcFuncs = {'gcOverall':GCeval.gcTotal,'gc1':GCeval.gc1,'gc2':GCeval.gc2,'gc3':GCeval.gc3} + + def countCodons(self): + # Stores the different codon tables and updates their codon counts + cdnTbls = GenUtil.getCDNtable(self.transTable) + self.cdnCounts_6F,self.amb_cdn = GenUtil.mapCdns(self.ntd, cdnTbls[1]) + self.cdnCounts_No6F = GenUtil.mapCdns(self.ntd, cdnTbls[0]) + + def ENcStats(self): + # Stores the various Effective Number of Codons calculations in the class + self.expENc = CalcCUB.expWrightENc(self.gc3) + self.obsENc_6F = CalcCUB.calcWrightENc(self.cdnCounts_6F) + self.obsENc_No6F = CalcCUB.calcWrightENc(self.cdnCounts_No6F) + self.SunENc_6F = CalcCUB.SunEq5(self.cdnCounts_6F) + self.SunENc_No6F = CalcCUB.SunEq5(self.cdnCounts_No6F) + + def GCstats(self): + # Stores the various GC-stats in the class + for k, v in self.gcFuncs.items(): + setattr(self,k,v(self.ntd)) + self.gc4F = GCeval.gc3_4F(self.cdnCounts_No6F) + + + def RSCUstats(self): + self.rscu_No6Fold = CalcCUB.RSCU(self.cdnCounts_No6F) + self.rscu_6Fold = CalcCUB.RSCU(self.cdnCounts_6F) + + +def prepFolders(outName): + if os.path.isdir(outName) == False: + os.mkdir(outName) + if os.path.isdir(outName+'/Plots') == False: + os.mkdir(outName+'/Plots') + if os.path.isdir(outName+'/SpreadSheets') == False: + os.mkdir(outName+'/SpreadSheets') + + +def CalcRefFasta(fasta, gCode): + seqDB = {i.description:SeqInfo(i.seq, gCode) for i in SeqIO.parse(fasta,'fasta')} + GenCDNtable = {} + for k, v in seqDB.items(): + v.countCodons() + v.GCstats() + v.ENcStats() + for k, v in v.cdnCounts_6F.items(): + if k.isalpha() and k not in GenCDNtable .keys(): + GenCDNtable[k] = [v[0],v[-1]] + else: + GenCDNtable[k][-1] += v[-1] + RSCU = CalcCUB.calcRCSU(GenCDNtable) + return seqDB, RSCU + + +def WriteWrightOut(seqData, outName, comp): + if comp == False: + with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.ENc.Raw.tsv','w+') as w: + w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t' + 'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t' + 'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n') + for k, v in seqData.items(): + name = [k] + gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)] + ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F), + str(v.SunENc_6F),str(v.SunENc_No6F)] + w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n') + else: + with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.CompTrans.ENc.Raw.tsv','w+') as w: + w.write('SequenceID\tAmbiguousCodons\tGC-Overall\tGC1\tGC2\tGC3\t' + 'GC3-Degen\tExpWrightENc\tObsWrightENc_6Fold\tObsWrightENc_No6Fold\t' + 'ObsWeightedENc_6Fold\tObsWeightedENc_No6Fold\n') + for k, v in seqData.items(): + name = [k] + gcs = [str(v.gcOverall),str(v.gc1),str(v.gc2),str(v.gc3),str(v.gc4F)] + ENc = [str(v.expENc),str(v.obsENc_6F),str(v.obsENc_No6F), + str(v.SunENc_6F),str(v.SunENc_No6F)] + w.write('\t'.join(name+[str(v.amb_cdn)]+gcs+ENc)+'\n') + + +def getCompFasta(fasta, gCode): + print(fasta) + stopCDNs = {'1':['TAA','TAG','TGA'], '4':['TAA','TAG'], '6':['TGA'], '10':['TAA','TAG'], + '29':['TGA'], '30':['TGA'], 'universal':['TAA','TAG','TGA'], 'blepharisma':['TAA','TAG'], + 'ciliate':['TGA'],'euplotes':['TAA','TAG'], 'mesodinium':['TGA'], 'peritrich':['TGA'], + 'chilo':['TAA']} + if gCode.lower() not in stopCDNs.keys(): + stops = stopCDNs['1'] + else: + stops = stopCDNs[gCode] + + with open(fasta.replace('.fasta','.Comp.fasta'),'w+') as w: + for i in SeqIO.parse(fasta,'fasta'): + #if str(i.seq).upper().startswith('ATG') and str(i.seq).upper()[-3:] in stops: + #if str(i.seq).upper()[-3:] in stops: + if len(i.seq) % 3 == 0: + w.write('>'+i.description+'\n'+str(i.seq)+'\n') + + return fasta.replace('.fasta','.Comp.fasta') + +def WriteNullENcOut(outName): + with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.ENc.Null.tsv','w+') as w: + w.write('GC3\tENc\n') + w.write('\n'.join(CalcCUB.nullENcGC3())) + + +def WriteRSCUtbl(RSCUtbl, outName): + with open(outName+'/SpreadSheets/'+outName.split('/')[-1]+'.RSCU.tsv','w+') as w: + w.write('Codon\tAmino Acid\tRSCU\n') + for k,v in RSCUtbl.items(): + w.write(k+'\t'+'\t'.join(v)+'\n') + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print('\nUsage:\n') + print('python CUB.py MyNtds.fasta MyTaxon genetic_code\n') + print('\nGenetic Codes:\n') + gcd = ['1', '4', '6', '10', '29', '30', 'universal', 'blepharisma', + 'ciliate','euplotes', 'mesodinium', 'peritrich','chilo'] + print('\n'.join(gcd)+'\n') + sys.exit() + fasta = sys.argv[1] + try: + outName = sys.argv[2] + except: + print('Missing an output name. Include one, then run again!') + sys.exit() + try: + gCode = sys.argv[3] + except: + gCode = 'universal' + compFasta = getCompFasta(fasta, gCode) + prepFolders(outName) + fastaDataRaw, RSCUtbl = CalcRefFasta(fasta, gCode) + fastaDataComp, RSCUtbl = CalcRefFasta(compFasta, gCode) + WriteWrightOut(fastaDataRaw, outName, comp=False) + WriteWrightOut(fastaDataComp, outName, comp=True) + WriteNullENcOut(outName) + WriteRSCUtbl(RSCUtbl, outName) + os.system('cp '+fasta+' '+outName+'/') + os.system('mv '+compFasta+' '+outName+'/') diff --git a/PTL1/Transcriptomes/Scripts/PTL1.2831.out b/PTL1/Transcriptomes/Scripts/PTL1.2831.out new file mode 100644 index 0000000..aaee3ca --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/PTL1.2831.out @@ -0,0 +1,31859 @@ + +The following have been reloaded with a version change: + 1) GCCcore/10.2.0 => GCCcore/8.3.0 + 2) GMP/6.2.0-GCCcore-10.2.0 => GMP/6.1.2-GCCcore-8.3.0 + 3) Python/3.8.6-GCCcore-10.2.0 => Python/3.7.4-GCCcore-8.3.0 + 4) SQLite/3.33.0-GCCcore-10.2.0 => SQLite/3.29.0-GCCcore-8.3.0 + 5) Tcl/8.6.10-GCCcore-10.2.0 => Tcl/8.6.9-GCCcore-8.3.0 + 6) XZ/5.2.5-GCCcore-10.2.0 => XZ/5.2.4-GCCcore-8.3.0 + 7) binutils/2.35-GCCcore-10.2.0 => binutils/2.32-GCCcore-8.3.0 + 8) bzip2/1.0.8-GCCcore-10.2.0 => bzip2/1.0.8-GCCcore-8.3.0 + 9) libffi/3.3-GCCcore-10.2.0 => libffi/3.2.1-GCCcore-8.3.0 + 10) libreadline/8.0-GCCcore-10.2.0 => libreadline/8.0-GCCcore-8.3.0 + 11) ncurses/6.2-GCCcore-10.2.0 => ncurses/6.1-GCCcore-8.3.0 + 12) zlib/1.2.11-GCCcore-10.2.0 => zlib/1.2.11-GCCcore-8.3.0 + + +The following have been reloaded with a version change: + 1) GCC/8.3.0 => GCC/10.3.0 + 2) GCCcore/8.3.0 => GCCcore/10.3.0 + 3) binutils/2.32-GCCcore-8.3.0 => binutils/2.36.1-GCCcore-10.3.0 + 4) bzip2/1.0.8-GCCcore-8.3.0 => bzip2/1.0.8-GCCcore-10.3.0 + 5) zlib/1.2.11-GCCcore-8.3.0 => zlib/1.2.11-GCCcore-10.3.0 + + + +Prepping Sr_rh_Emac_assembledTranscripts.fasta + + +There are 60195 contigs > 200 in Sr_rh_Emac_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Tx01_assembledTranscripts.fasta + + +There are 142089 contigs > 200 in Sr_rh_Tx01_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn13_assembledTranscripts.fasta + + +There are 150535 contigs > 200 in Sr_rh_Nn13_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Sspa_assembledTranscripts.fasta + + +There are 96071 contigs > 200 in Sr_rh_Sspa_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn06_assembledTranscripts.fasta + + +There are 145214 contigs > 200 in Sr_rh_Nn06_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Atps_assembledTranscripts.fasta + + +There are 10253 contigs > 200 in Sr_rh_Atps_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv01_assembledTranscripts.fasta + + +There are 208216 contigs > 200 in Sr_rh_Bv01_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv05_assembledTranscripts.fasta + + +There are 218559 contigs > 200 in Sr_rh_Bv05_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Gl03_assembledTranscripts.fasta + + +There are 251483 contigs > 200 in Sr_rh_Gl03_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv04_assembledTranscripts.fasta + + +There are 224949 contigs > 200 in Sr_rh_Bv04_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Gl01_assembledTranscripts.fasta + + +There are 8191 contigs > 200 in Sr_rh_Gl01_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Hhir_assembledTranscripts.fasta + + +There are 55856 contigs > 200 in Sr_rh_Hhir_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn02_assembledTranscripts.fasta + + +There are 138826 contigs > 200 in Sr_rh_Nn02_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Slin_assembledTranscripts.fasta + + +There are 37070 contigs > 200 in Sr_rh_Slin_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Emar_assembledTranscripts.fasta + + +There are 43809 contigs > 200 in Sr_rh_Emar_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Calb_assembledTranscripts.fasta + + +There are 75768 contigs > 200 in Sr_rh_Calb_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Rsp1_assembledTranscripts.fasta + + +There are 167000 contigs > 200 in Sr_rh_Rsp1_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn09_assembledTranscripts.fasta + + +There are 221270 contigs > 200 in Sr_rh_Nn09_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn10_assembledTranscripts.fasta + + +There are 140733 contigs > 200 in Sr_rh_Nn10_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn01_assembledTranscripts.fasta + + +There are 172554 contigs > 200 in Sr_rh_Nn01_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn12_assembledTranscripts.fasta + + +There are 136924 contigs > 200 in Sr_rh_Nn12_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv03_assembledTranscripts.fasta + + +There are 228384 contigs > 200 in Sr_rh_Bv03_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Hind_assembledTranscripts.fasta + + +There are 65043 contigs > 200 in Sr_rh_Hind_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn04_assembledTranscripts.fasta + + +There are 312525 contigs > 200 in Sr_rh_Nn04_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn08_assembledTranscripts.fasta + + +There are 198852 contigs > 200 in Sr_rh_Nn08_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn07_assembledTranscripts.fasta + + +There are 172565 contigs > 200 in Sr_rh_Nn07_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn11_assembledTranscripts.fasta + + +There are 129060 contigs > 200 in Sr_rh_Nn11_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn14_assembledTranscripts.fasta + + +There are 152790 contigs > 200 in Sr_rh_Nn14_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Gsp1_assembledTranscripts.fasta + + +There are 62212 contigs > 200 in Sr_rh_Gsp1_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Halb_assembledTranscripts.fasta + + +There are 100948 contigs > 200 in Sr_rh_Halb_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv06_assembledTranscripts.fasta + + +There are 143978 contigs > 200 in Sr_rh_Bv06_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Gl02_assembledTranscripts.fasta + + +There are 261586 contigs > 200 in Sr_rh_Gl02_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn05_assembledTranscripts.fasta + + +There are 128153 contigs > 200 in Sr_rh_Nn05_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Nn03_assembledTranscripts.fasta + + +There are 131998 contigs > 200 in Sr_rh_Nn03_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Esca_assembledTranscripts.fasta + + +There are 99293 contigs > 200 in Sr_rh_Esca_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Usac_assembledTranscripts.fasta + + +There are 133441 contigs > 200 in Sr_rh_Usac_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Bv02_assembledTranscripts.fasta + + +There are 236067 contigs > 200 in Sr_rh_Bv02_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + + + +Prepping Sr_rh_Gspa_assembledTranscripts.fasta + + +There are 22017 contigs > 200 in Sr_rh_Gspa_assembledTranscripts.fasta + + +Look for /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa.200bp.fasta + + +Next Script is: 2_Auto_rRNA_BvE.py +(Alternatively 2a_remove_rRNA.py followed by 2b_remove_Bact.py) + + +MERGE following files +CREATE a dictionnary of sequences + +Error in cross-plate contamination assessment: the ten-digit code Sr_rh_Gspa is not found in the conspecific names file. Please check that this file is correct and try again. + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn02.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn02.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.212s] +Masking queries... [0.336s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.137s] +Allocating buffers... [0s] +Loading reference sequences... [1.097s] +Masking reference... [0.717s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.306s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.233s] +Building query seed array... [0.09s] +Computing hash join... [0.113s] +Building seed filter... [0.008s] +Searching alignments... [0.148s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.095s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.134s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.099s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.144s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.085s] +Computing hash join... [0.095s] +Building seed filter... [0.008s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.214s] +Building query seed array... [0.085s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.102s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.13s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.255s] +Building query seed array... [0.105s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.127s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.092s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.126s] +Deallocating buffers... [0.057s] +Computing alignments... [1.45s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.021s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.975s +Reported 13195 pairwise alignments, 13268 HSPs. +13195 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.247s] +Masking queries... [0.318s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.128s] +Allocating buffers... [0s] +Loading reference sequences... [0.978s] +Masking reference... [0.649s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.27s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.218s] +Building query seed array... [0.101s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.324s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.104s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.278s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.097s] +Computing hash join... [0.089s] +Building seed filter... [0.008s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.086s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.318s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.204s] +Building query seed array... [0.091s] +Computing hash join... [0.087s] +Building seed filter... [0.008s] +Searching alignments... [0.234s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.095s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.205s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.101s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.219s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.09s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.203s] +Deallocating buffers... [0.052s] +Computing alignments... [2.586s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.677s +Reported 27299 pairwise alignments, 27536 HSPs. +27299 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/Sr_rh_Nn02_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn02_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Gspa.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Gspa.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.299s] +Masking queries... [0.104s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.046s] +Allocating buffers... [0s] +Loading reference sequences... [1.097s] +Masking reference... [0.693s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.327s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.235s] +Building query seed array... [0.037s] +Computing hash join... [0.079s] +Building seed filter... [0.005s] +Searching alignments... [0.061s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.247s] +Building query seed array... [0.037s] +Computing hash join... [0.065s] +Building seed filter... [0.004s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.255s] +Building query seed array... [0.042s] +Computing hash join... [0.063s] +Building seed filter... [0.005s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.037s] +Computing hash join... [0.062s] +Building seed filter... [0.004s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.202s] +Building query seed array... [0.035s] +Computing hash join... [0.06s] +Building seed filter... [0.004s] +Searching alignments... [0.054s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.235s] +Building query seed array... [0.04s] +Computing hash join... [0.057s] +Building seed filter... [0.004s] +Searching alignments... [0.052s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.041s] +Computing hash join... [0.061s] +Building seed filter... [0.004s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.203s] +Building query seed array... [0.036s] +Computing hash join... [0.06s] +Building seed filter... [0.004s] +Searching alignments... [0.052s] +Deallocating buffers... [0.055s] +Computing alignments... [0.628s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.003s] +Deallocating queries... [0.003s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 6.484s +Reported 4770 pairwise alignments, 4832 HSPs. +4770 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/BvE +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.299s] +Masking queries... [0.154s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.048s] +Allocating buffers... [0s] +Loading reference sequences... [0.99s] +Masking reference... [0.788s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.273s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.22s] +Building query seed array... [0.054s] +Computing hash join... [0.067s] +Building seed filter... [0.004s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.034s] +Computing hash join... [0.058s] +Building seed filter... [0.004s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.24s] +Building query seed array... [0.044s] +Computing hash join... [0.057s] +Building seed filter... [0.005s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.039s] +Computing hash join... [0.058s] +Building seed filter... [0.004s] +Searching alignments... [0.065s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.191s] +Building query seed array... [0.032s] +Computing hash join... [0.054s] +Building seed filter... [0.004s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.039s] +Computing hash join... [0.052s] +Building seed filter... [0.005s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.045s] +Computing hash join... [0.06s] +Building seed filter... [0.005s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.19s] +Building query seed array... [0.03s] +Computing hash join... [0.052s] +Building seed filter... [0.004s] +Searching alignments... [0.06s] +Deallocating buffers... [0.049s] +Computing alignments... [0.79s] +Deallocating reference... [0.016s] +Loading reference sequences... [0s] +Deallocating buffers... [0.003s] +Deallocating queries... [0.003s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 6.567s +Reported 9142 pairwise alignments, 9308 HSPs. +9142 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/Sr_rh_Gspa_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Gspa_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn06.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn06.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.259s] +Masking queries... [0.362s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.129s] +Allocating buffers... [0s] +Loading reference sequences... [1.101s] +Masking reference... [0.69s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.299s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.23s] +Building query seed array... [0.104s] +Computing hash join... [0.111s] +Building seed filter... [0.007s] +Searching alignments... [0.145s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.255s] +Building query seed array... [0.1s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.144s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.103s] +Computing hash join... [0.1s] +Building seed filter... [0.006s] +Searching alignments... [0.141s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.214s] +Building query seed array... [0.092s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.141s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.099s] +Computing hash join... [0.098s] +Building seed filter... [0.008s] +Searching alignments... [0.13s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.244s] +Building query seed array... [0.109s] +Computing hash join... [0.101s] +Building seed filter... [0.008s] +Searching alignments... [0.127s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.106s] +Computing hash join... [0.097s] +Building seed filter... [0.008s] +Searching alignments... [0.137s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.102s] +Computing hash join... [0.1s] +Building seed filter... [0.009s] +Searching alignments... [0.13s] +Deallocating buffers... [0.058s] +Computing alignments... [1.366s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.021s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.048s +Reported 13937 pairwise alignments, 14011 HSPs. +13937 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.265s] +Masking queries... [0.333s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.133s] +Allocating buffers... [0s] +Loading reference sequences... [1.004s] +Masking reference... [0.644s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.272s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.22s] +Building query seed array... [0.107s] +Computing hash join... [0.129s] +Building seed filter... [0.007s] +Searching alignments... [0.357s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.231s] +Building query seed array... [0.099s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.323s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.103s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.218s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.087s] +Computing hash join... [0.094s] +Building seed filter... [0.008s] +Searching alignments... [0.312s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.198s] +Building query seed array... [0.089s] +Computing hash join... [0.092s] +Building seed filter... [0.008s] +Searching alignments... [0.251s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.099s] +Computing hash join... [0.092s] +Building seed filter... [0.009s] +Searching alignments... [0.235s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.238s] +Building query seed array... [0.107s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.234s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.091s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.226s] +Deallocating buffers... [0.051s] +Computing alignments... [2.637s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.998s +Reported 29695 pairwise alignments, 29910 HSPs. +29695 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/Sr_rh_Nn06_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn06_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv02.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv02.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.833s] +Masking queries... [0.55s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.161s] +Allocating buffers... [0s] +Loading reference sequences... [1.098s] +Masking reference... [0.696s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.298s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.233s] +Building query seed array... [0.127s] +Computing hash join... [0.113s] +Building seed filter... [0.006s] +Searching alignments... [0.182s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.11s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.18s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.13s] +Computing hash join... [0.101s] +Building seed filter... [0.007s] +Searching alignments... [0.173s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.097s] +Computing hash join... [0.095s] +Building seed filter... [0.007s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.101s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.154s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.114s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.158s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.115s] +Computing hash join... [0.1s] +Building seed filter... [0.007s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.216s] +Building query seed array... [0.1s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.157s] +Deallocating buffers... [0.058s] +Computing alignments... [1.721s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.493s +Reported 18788 pairwise alignments, 18972 HSPs. +18788 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.836s] +Masking queries... [0.465s] +Building query seed set... [0.049s] +Algorithm: Double-indexed +Building query histograms... [0.155s] +Allocating buffers... [0s] +Loading reference sequences... [0.978s] +Masking reference... [0.644s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.282s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.122s] +Computing hash join... [0.106s] +Building seed filter... [0.008s] +Searching alignments... [0.399s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.112s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.343s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.114s] +Computing hash join... [0.09s] +Building seed filter... [0.009s] +Searching alignments... [0.3s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.199s] +Building query seed array... [0.108s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.348s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.202s] +Building query seed array... [0.1s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.293s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.111s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.252s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.13s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.245s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.101s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.246s] +Deallocating buffers... [0.051s] +Computing alignments... [3.276s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.017s] +Deallocating queries... [0.023s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.717s +Reported 38037 pairwise alignments, 38537 HSPs. +38037 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/Sr_rh_Bv02_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv02_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Hind.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Hind.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.878s] +Masking queries... [0.263s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.101s] +Allocating buffers... [0s] +Loading reference sequences... [1.102s] +Masking reference... [0.695s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.322s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.092s] +Computing hash join... [0.118s] +Building seed filter... [0.005s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.25s] +Building query seed array... [0.08s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.079s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.107s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.23s] +Building query seed array... [0.077s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.213s] +Building query seed array... [0.07s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.103s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.08s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.101s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.246s] +Building query seed array... [0.082s] +Computing hash join... [0.09s] +Building seed filter... [0.006s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.074s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.092s] +Deallocating buffers... [0.059s] +Computing alignments... [0.886s] +Deallocating reference... [0.028s] +Loading reference sequences... [0s] +Deallocating buffers... [0.011s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.616s +Reported 7467 pairwise alignments, 7480 HSPs. +7467 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.888s] +Masking queries... [0.252s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.114s] +Allocating buffers... [0s] +Loading reference sequences... [0.998s] +Masking reference... [0.663s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.286s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.085s] +Computing hash join... [0.118s] +Building seed filter... [0.006s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.082s] +Computing hash join... [0.09s] +Building seed filter... [0.005s] +Searching alignments... [0.144s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.079s] +Computing hash join... [0.087s] +Building seed filter... [0.006s] +Searching alignments... [0.13s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.069s] +Computing hash join... [0.086s] +Building seed filter... [0.005s] +Searching alignments... [0.131s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.194s] +Building query seed array... [0.074s] +Computing hash join... [0.085s] +Building seed filter... [0.005s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.078s] +Computing hash join... [0.084s] +Building seed filter... [0.005s] +Searching alignments... [0.127s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.227s] +Building query seed array... [0.081s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.118s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.191s] +Building query seed array... [0.073s] +Computing hash join... [0.087s] +Building seed filter... [0.007s] +Searching alignments... [0.111s] +Deallocating buffers... [0.05s] +Computing alignments... [1.503s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.009s +Reported 14755 pairwise alignments, 14799 HSPs. +14755 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/Sr_rh_Hind_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Hind_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn11.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn11.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.123s] +Masking queries... [0.301s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.127s] +Allocating buffers... [0s] +Loading reference sequences... [1.09s] +Masking reference... [0.688s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.286s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.231s] +Building query seed array... [0.102s] +Computing hash join... [0.12s] +Building seed filter... [0.007s] +Searching alignments... [0.134s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.098s] +Computing hash join... [0.096s] +Building seed filter... [0.008s] +Searching alignments... [0.132s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.255s] +Building query seed array... [0.093s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.082s] +Computing hash join... [0.096s] +Building seed filter... [0.008s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.208s] +Building query seed array... [0.084s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.236s] +Building query seed array... [0.091s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.243s] +Building query seed array... [0.086s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.111s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.082s] +Computing hash join... [0.101s] +Building seed filter... [0.007s] +Searching alignments... [0.111s] +Deallocating buffers... [0.056s] +Computing alignments... [1.084s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.263s +Reported 11697 pairwise alignments, 11771 HSPs. +11697 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.128s] +Masking queries... [0.307s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.119s] +Allocating buffers... [0s] +Loading reference sequences... [0.991s] +Masking reference... [0.636s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.269s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.216s] +Building query seed array... [0.098s] +Computing hash join... [0.109s] +Building seed filter... [0.006s] +Searching alignments... [0.295s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.229s] +Building query seed array... [0.088s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.181s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.243s] +Building query seed array... [0.09s] +Computing hash join... [0.088s] +Building seed filter... [0.008s] +Searching alignments... [0.17s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.075s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.166s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.194s] +Building query seed array... [0.086s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.152s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.221s] +Building query seed array... [0.087s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.163s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.095s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.155s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.193s] +Building query seed array... [0.085s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.151s] +Deallocating buffers... [0.051s] +Computing alignments... [1.964s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.254s +Reported 25408 pairwise alignments, 25601 HSPs. +25408 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/Sr_rh_Nn11_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn11_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv03.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv03.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.786s] +Masking queries... [0.459s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.148s] +Allocating buffers... [0s] +Loading reference sequences... [1.108s] +Masking reference... [0.701s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.301s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.235s] +Building query seed array... [0.115s] +Computing hash join... [0.11s] +Building seed filter... [0.006s] +Searching alignments... [0.176s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.106s] +Computing hash join... [0.095s] +Building seed filter... [0.008s] +Searching alignments... [0.158s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.246s] +Building query seed array... [0.107s] +Computing hash join... [0.097s] +Building seed filter... [0.008s] +Searching alignments... [0.17s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.097s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.094s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.106s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.244s] +Building query seed array... [0.113s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.153s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.205s] +Building query seed array... [0.091s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.149s] +Deallocating buffers... [0.056s] +Computing alignments... [1.651s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.02s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.134s +Reported 18214 pairwise alignments, 18404 HSPs. +18214 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/BvE +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.797s] +Masking queries... [0.454s] +Building query seed set... [0.051s] +Algorithm: Double-indexed +Building query histograms... [0.149s] +Allocating buffers... [0s] +Loading reference sequences... [1.046s] +Masking reference... [0.659s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.279s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.227s] +Building query seed array... [0.107s] +Computing hash join... [0.108s] +Building seed filter... [0.009s] +Searching alignments... [0.363s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.228s] +Building query seed array... [0.108s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.313s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.107s] +Computing hash join... [0.09s] +Building seed filter... [0.006s] +Searching alignments... [0.297s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.098s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.347s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.202s] +Building query seed array... [0.105s] +Computing hash join... [0.087s] +Building seed filter... [0.007s] +Searching alignments... [0.255s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.106s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.253s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.107s] +Computing hash join... [0.088s] +Building seed filter... [0.009s] +Searching alignments... [0.229s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.098s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.233s] +Deallocating buffers... [0.05s] +Computing alignments... [3.164s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.417s +Reported 36335 pairwise alignments, 36838 HSPs. +36335 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/Sr_rh_Bv03_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv03_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Gsp1.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Gsp1.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.606s] +Masking queries... [0.179s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.071s] +Allocating buffers... [0s] +Loading reference sequences... [1.127s] +Masking reference... [0.691s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.311s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.24s] +Building query seed array... [0.068s] +Computing hash join... [0.118s] +Building seed filter... [0.005s] +Searching alignments... [0.092s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.059s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.055s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.08s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.053s] +Computing hash join... [0.082s] +Building seed filter... [0.006s] +Searching alignments... [0.081s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.054s] +Computing hash join... [0.083s] +Building seed filter... [0.005s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.233s] +Building query seed array... [0.057s] +Computing hash join... [0.082s] +Building seed filter... [0.006s] +Searching alignments... [0.078s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.246s] +Building query seed array... [0.059s] +Computing hash join... [0.081s] +Building seed filter... [0.006s] +Searching alignments... [0.073s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.203s] +Building query seed array... [0.053s] +Computing hash join... [0.086s] +Building seed filter... [0.006s] +Searching alignments... [0.073s] +Deallocating buffers... [0.057s] +Computing alignments... [0.674s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.007s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.525s +Reported 6372 pairwise alignments, 6381 HSPs. +6372 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.615s] +Masking queries... [0.175s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.067s] +Allocating buffers... [0s] +Loading reference sequences... [1.021s] +Masking reference... [0.638s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.27s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.216s] +Building query seed array... [0.067s] +Computing hash join... [0.111s] +Building seed filter... [0.005s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.059s] +Computing hash join... [0.084s] +Building seed filter... [0.006s] +Searching alignments... [0.124s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.069s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.109s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.194s] +Building query seed array... [0.054s] +Computing hash join... [0.084s] +Building seed filter... [0.005s] +Searching alignments... [0.111s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.196s] +Building query seed array... [0.052s] +Computing hash join... [0.074s] +Building seed filter... [0.006s] +Searching alignments... [0.093s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.221s] +Building query seed array... [0.054s] +Computing hash join... [0.078s] +Building seed filter... [0.006s] +Searching alignments... [0.098s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.228s] +Building query seed array... [0.053s] +Computing hash join... [0.077s] +Building seed filter... [0.006s] +Searching alignments... [0.091s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.19s] +Building query seed array... [0.052s] +Computing hash join... [0.079s] +Building seed filter... [0.006s] +Searching alignments... [0.096s] +Deallocating buffers... [0.049s] +Computing alignments... [1.173s] +Deallocating reference... [0.017s] +Loading reference sequences... [0s] +Deallocating buffers... [0.008s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.856s +Reported 13026 pairwise alignments, 13050 HSPs. +13026 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/Sr_rh_Gsp1_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Gsp1_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv06.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv06.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.977s] +Masking queries... [0.29s] +Building query seed set... [0.051s] +Algorithm: Double-indexed +Building query histograms... [0.086s] +Allocating buffers... [0s] +Loading reference sequences... [1.09s] +Masking reference... [0.691s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.318s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.072s] +Computing hash join... [0.134s] +Building seed filter... [0.006s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.242s] +Building query seed array... [0.063s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.105s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.254s] +Building query seed array... [0.067s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.221s] +Building query seed array... [0.067s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.097s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.213s] +Building query seed array... [0.059s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.089s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.251s] +Building query seed array... [0.066s] +Computing hash join... [0.09s] +Building seed filter... [0.005s] +Searching alignments... [0.085s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.243s] +Building query seed array... [0.072s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.206s] +Building query seed array... [0.061s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.089s] +Deallocating buffers... [0.059s] +Computing alignments... [0.916s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.558s +Reported 11454 pairwise alignments, 11535 HSPs. +11454 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0.001s] +Opening the output file... [0s] +Loading query sequences... [0.99s] +Masking queries... [0.258s] +Building query seed set... [0.048s] +Algorithm: Double-indexed +Building query histograms... [0.088s] +Allocating buffers... [0s] +Loading reference sequences... [0.994s] +Masking reference... [0.645s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.273s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.071s] +Computing hash join... [0.117s] +Building seed filter... [0.006s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.065s] +Computing hash join... [0.093s] +Building seed filter... [0.005s] +Searching alignments... [0.171s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.069s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.148s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.062s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.179s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.201s] +Building query seed array... [0.06s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.136s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.229s] +Building query seed array... [0.068s] +Computing hash join... [0.084s] +Building seed filter... [0.005s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.244s] +Building query seed array... [0.073s] +Computing hash join... [0.09s] +Building seed filter... [0.006s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.054s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.13s] +Deallocating buffers... [0.05s] +Computing alignments... [1.598s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.277s +Reported 22234 pairwise alignments, 22429 HSPs. +22234 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/Sr_rh_Bv06_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv06_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Gl01.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Gl01.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.043s] +Masking queries... [0.023s] +Building query seed set... [0.029s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [1.086s] +Masking reference... [0.71s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.304s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.243s] +Building query seed array... [0.012s] +Computing hash join... [0.076s] +Building seed filter... [0.004s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.011s] +Computing hash join... [0.063s] +Building seed filter... [0.004s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.01s] +Computing hash join... [0.055s] +Building seed filter... [0.004s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.205s] +Building query seed array... [0.006s] +Computing hash join... [0.051s] +Building seed filter... [0.004s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.208s] +Building query seed array... [0.009s] +Computing hash join... [0.057s] +Building seed filter... [0.004s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.234s] +Building query seed array... [0.007s] +Computing hash join... [0.055s] +Building seed filter... [0.004s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.011s] +Computing hash join... [0.054s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.206s] +Building query seed array... [0.007s] +Computing hash join... [0.057s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Deallocating buffers... [0.057s] +Computing alignments... [0.126s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 4.935s +Reported 901 pairwise alignments, 903 HSPs. +901 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.026s] +Building query seed set... [0.029s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [1.012s] +Masking reference... [0.627s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.288s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.023s] +Computing hash join... [0.053s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.231s] +Building query seed array... [0.018s] +Computing hash join... [0.049s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.015s] +Computing hash join... [0.048s] +Building seed filter... [0.004s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.202s] +Building query seed array... [0.015s] +Computing hash join... [0.053s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.2s] +Building query seed array... [0.018s] +Computing hash join... [0.05s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.016s] +Computing hash join... [0.049s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.228s] +Building query seed array... [0.015s] +Computing hash join... [0.05s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.018s] +Computing hash join... [0.049s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Deallocating buffers... [0.052s] +Computing alignments... [0.168s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 4.846s +Reported 2013 pairwise alignments, 2017 HSPs. +2013 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/Sr_rh_Gl01_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Gl01_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Esca.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Esca.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.72s] +Masking queries... [0.231s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.074s] +Allocating buffers... [0s] +Loading reference sequences... [1.084s] +Masking reference... [0.687s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.303s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.227s] +Building query seed array... [0.07s] +Computing hash join... [0.127s] +Building seed filter... [0.006s] +Searching alignments... [0.092s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.057s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.087s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.054s] +Computing hash join... [0.081s] +Building seed filter... [0.005s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.053s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.086s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.209s] +Building query seed array... [0.053s] +Computing hash join... [0.077s] +Building seed filter... [0.004s] +Searching alignments... [0.073s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.235s] +Building query seed array... [0.053s] +Computing hash join... [0.074s] +Building seed filter... [0.005s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.262s] +Building query seed array... [0.07s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.075s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.055s] +Computing hash join... [0.074s] +Building seed filter... [0.005s] +Searching alignments... [0.073s] +Deallocating buffers... [0.057s] +Computing alignments... [0.777s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.008s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.742s +Reported 8279 pairwise alignments, 8300 HSPs. +8279 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.723s] +Masking queries... [0.215s] +Building query seed set... [0.043s] +Algorithm: Double-indexed +Building query histograms... [0.082s] +Allocating buffers... [0s] +Loading reference sequences... [1.018s] +Masking reference... [0.642s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.296s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.218s] +Building query seed array... [0.064s] +Computing hash join... [0.102s] +Building seed filter... [0.006s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.056s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.137s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.054s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.054s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.136s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.194s] +Building query seed array... [0.056s] +Computing hash join... [0.078s] +Building seed filter... [0.006s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.066s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.108s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.056s] +Computing hash join... [0.075s] +Building seed filter... [0.006s] +Searching alignments... [0.11s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.048s] +Computing hash join... [0.086s] +Building seed filter... [0.005s] +Searching alignments... [0.116s] +Deallocating buffers... [0.049s] +Computing alignments... [1.318s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.01s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.354s +Reported 17266 pairwise alignments, 17309 HSPs. +17266 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/Sr_rh_Esca_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Esca_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Calb.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Calb.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.792s] +Masking queries... [0.208s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.082s] +Allocating buffers... [0s] +Loading reference sequences... [1.109s] +Masking reference... [0.705s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.306s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.233s] +Building query seed array... [0.076s] +Computing hash join... [0.131s] +Building seed filter... [0.004s] +Searching alignments... [0.104s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.249s] +Building query seed array... [0.064s] +Computing hash join... [0.107s] +Building seed filter... [0.006s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.063s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.093s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.207s] +Building query seed array... [0.062s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.089s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.208s] +Building query seed array... [0.061s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.083s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.236s] +Building query seed array... [0.063s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.083s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.242s] +Building query seed array... [0.066s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.065s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.081s] +Deallocating buffers... [0.056s] +Computing alignments... [0.797s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.01s] +Deallocating queries... [0.007s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.114s +Reported 7719 pairwise alignments, 7754 HSPs. +7719 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.79s] +Masking queries... [0.213s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.086s] +Allocating buffers... [0s] +Loading reference sequences... [1.004s] +Masking reference... [0.641s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.265s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.216s] +Building query seed array... [0.079s] +Computing hash join... [0.118s] +Building seed filter... [0.005s] +Searching alignments... [0.142s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.066s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.137s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.229s] +Building query seed array... [0.062s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.055s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.133s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.196s] +Building query seed array... [0.055s] +Computing hash join... [0.091s] +Building seed filter... [0.005s] +Searching alignments... [0.118s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.061s] +Computing hash join... [0.089s] +Building seed filter... [0.005s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.228s] +Building query seed array... [0.062s] +Computing hash join... [0.09s] +Building seed filter... [0.005s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.191s] +Building query seed array... [0.063s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.112s] +Deallocating buffers... [0.05s] +Computing alignments... [1.391s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0.01s] +Deallocating queries... [0.01s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.576s +Reported 15220 pairwise alignments, 15279 HSPs. +15220 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/Sr_rh_Calb_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Calb_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Emac.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Emac.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.739s] +Masking queries... [0.202s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.086s] +Allocating buffers... [0s] +Loading reference sequences... [1.109s] +Masking reference... [0.69s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.306s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.239s] +Building query seed array... [0.076s] +Computing hash join... [0.133s] +Building seed filter... [0.006s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.068s] +Computing hash join... [0.109s] +Building seed filter... [0.005s] +Searching alignments... [0.097s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.257s] +Building query seed array... [0.076s] +Computing hash join... [0.105s] +Building seed filter... [0.006s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.063s] +Computing hash join... [0.1s] +Building seed filter... [0.006s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.208s] +Building query seed array... [0.06s] +Computing hash join... [0.1s] +Building seed filter... [0.006s] +Searching alignments... [0.088s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.074s] +Computing hash join... [0.103s] +Building seed filter... [0.005s] +Searching alignments... [0.087s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.243s] +Building query seed array... [0.068s] +Computing hash join... [0.1s] +Building seed filter... [0.005s] +Searching alignments... [0.086s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.207s] +Building query seed array... [0.063s] +Computing hash join... [0.1s] +Building seed filter... [0.005s] +Searching alignments... [0.083s] +Deallocating buffers... [0.056s] +Computing alignments... [0.768s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.009s] +Deallocating queries... [0.007s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.096s +Reported 6356 pairwise alignments, 6370 HSPs. +6356 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.751s] +Masking queries... [0.21s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.091s] +Allocating buffers... [0s] +Loading reference sequences... [0.978s] +Masking reference... [0.64s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.277s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.225s] +Building query seed array... [0.08s] +Computing hash join... [0.145s] +Building seed filter... [0.005s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.065s] +Computing hash join... [0.101s] +Building seed filter... [0.006s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.074s] +Computing hash join... [0.101s] +Building seed filter... [0.006s] +Searching alignments... [0.111s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.064s] +Computing hash join... [0.098s] +Building seed filter... [0.005s] +Searching alignments... [0.11s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.194s] +Building query seed array... [0.06s] +Computing hash join... [0.096s] +Building seed filter... [0.005s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.217s] +Building query seed array... [0.072s] +Computing hash join... [0.097s] +Building seed filter... [0.005s] +Searching alignments... [0.097s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.229s] +Building query seed array... [0.073s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.189s] +Building query seed array... [0.062s] +Computing hash join... [0.099s] +Building seed filter... [0.005s] +Searching alignments... [0.099s] +Deallocating buffers... [0.05s] +Computing alignments... [1.187s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.011s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.282s +Reported 12686 pairwise alignments, 12732 HSPs. +12686 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/Sr_rh_Emac_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Emac_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn05.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn05.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.137s] +Masking queries... [0.299s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.121s] +Allocating buffers... [0s] +Loading reference sequences... [1.113s] +Masking reference... [0.703s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.297s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.233s] +Building query seed array... [0.092s] +Computing hash join... [0.113s] +Building seed filter... [0.008s] +Searching alignments... [0.13s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.089s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.124s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.087s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.082s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.126s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.08s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.088s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.091s] +Computing hash join... [0.097s] +Building seed filter... [0.009s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.081s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.117s] +Deallocating buffers... [0.058s] +Computing alignments... [1.143s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.02s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.346s +Reported 11705 pairwise alignments, 11759 HSPs. +11705 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/BvE +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.144s] +Masking queries... [0.297s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.129s] +Allocating buffers... [0s] +Loading reference sequences... [0.997s] +Masking reference... [0.638s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.27s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.108s] +Computing hash join... [0.142s] +Building seed filter... [0.005s] +Searching alignments... [0.321s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.229s] +Building query seed array... [0.086s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.265s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.09s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.264s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.203s] +Building query seed array... [0.076s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.274s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.197s] +Building query seed array... [0.075s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.17s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.231s] +Building query seed array... [0.093s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.22s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.229s] +Building query seed array... [0.095s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.203s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.079s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.157s] +Deallocating buffers... [0.051s] +Computing alignments... [2.168s] +Deallocating reference... [0.017s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.011s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.967s +Reported 25364 pairwise alignments, 25538 HSPs. +25364 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/Sr_rh_Nn05_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn05_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn10.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn10.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.355s] +Masking queries... [0.376s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.145s] +Allocating buffers... [0s] +Loading reference sequences... [1.106s] +Masking reference... [0.694s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.297s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.237s] +Building query seed array... [0.115s] +Computing hash join... [0.109s] +Building seed filter... [0.006s] +Searching alignments... [0.16s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.244s] +Building query seed array... [0.106s] +Computing hash join... [0.098s] +Building seed filter... [0.008s] +Searching alignments... [0.158s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.256s] +Building query seed array... [0.11s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.104s] +Computing hash join... [0.104s] +Building seed filter... [0.007s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.209s] +Building query seed array... [0.1s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.11s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.11s] +Computing hash join... [0.102s] +Building seed filter... [0.007s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.1s] +Computing hash join... [0.099s] +Building seed filter... [0.007s] +Searching alignments... [0.139s] +Deallocating buffers... [0.058s] +Computing alignments... [1.734s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.675s +Reported 14496 pairwise alignments, 14578 HSPs. +14496 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.363s] +Masking queries... [0.364s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.16s] +Allocating buffers... [0s] +Loading reference sequences... [0.984s] +Masking reference... [0.648s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.276s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.114s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.433s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.112s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.341s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.114s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.309s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.199s] +Building query seed array... [0.098s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.362s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.196s] +Building query seed array... [0.096s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.278s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.107s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.262s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.23s] +Building query seed array... [0.115s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.247s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.104s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.254s] +Deallocating buffers... [0.05s] +Computing alignments... [3.031s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.018s] +Deallocating queries... [0.019s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 12.87s +Reported 29484 pairwise alignments, 29722 HSPs. +29484 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/Sr_rh_Nn10_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn10_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn03.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn03.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.196s] +Masking queries... [0.334s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.124s] +Allocating buffers... [0s] +Loading reference sequences... [1.09s] +Masking reference... [0.688s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.293s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.231s] +Building query seed array... [0.096s] +Computing hash join... [0.107s] +Building seed filter... [0.008s] +Searching alignments... [0.133s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.091s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.133s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.099s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.134s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.083s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.134s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.08s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.117s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.092s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.094s] +Computing hash join... [0.094s] +Building seed filter... [0.005s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.088s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.119s] +Deallocating buffers... [0.058s] +Computing alignments... [1.366s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.678s +Reported 12756 pairwise alignments, 12883 HSPs. +12756 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.195s] +Masking queries... [0.316s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.127s] +Allocating buffers... [0s] +Loading reference sequences... [1.001s] +Masking reference... [0.637s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.267s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.218s] +Building query seed array... [0.107s] +Computing hash join... [0.104s] +Building seed filter... [0.012s] +Searching alignments... [0.298s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.1s] +Computing hash join... [0.094s] +Building seed filter... [0.008s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.246s] +Building query seed array... [0.107s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.231s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.089s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.267s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.195s] +Building query seed array... [0.082s] +Computing hash join... [0.087s] +Building seed filter... [0.008s] +Searching alignments... [0.213s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.221s] +Building query seed array... [0.092s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.188s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.101s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.184s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.201s] +Building query seed array... [0.084s] +Computing hash join... [0.086s] +Building seed filter... [0.008s] +Searching alignments... [0.19s] +Deallocating buffers... [0.052s] +Computing alignments... [2.409s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.289s +Reported 26522 pairwise alignments, 26813 HSPs. +26522 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/Sr_rh_Nn03_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn03_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Halb.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Halb.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.186s] +Masking queries... [0.342s] +Building query seed set... [0.066s] +Algorithm: Double-indexed +Building query histograms... [0.137s] +Allocating buffers... [0s] +Loading reference sequences... [1.119s] +Masking reference... [0.698s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.297s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.228s] +Building query seed array... [0.103s] +Computing hash join... [0.109s] +Building seed filter... [0.006s] +Searching alignments... [0.181s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.255s] +Building query seed array... [0.11s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.164s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.257s] +Building query seed array... [0.112s] +Computing hash join... [0.095s] +Building seed filter... [0.007s] +Searching alignments... [0.157s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.096s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.159s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.213s] +Building query seed array... [0.089s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.106s] +Computing hash join... [0.092s] +Building seed filter... [0.005s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.107s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.146s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.09s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.146s] +Deallocating buffers... [0.057s] +Computing alignments... [1.406s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.019s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.139s +Reported 9580 pairwise alignments, 9606 HSPs. +9580 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.18s] +Masking queries... [0.34s] +Building query seed set... [0.068s] +Algorithm: Double-indexed +Building query histograms... [0.138s] +Allocating buffers... [0s] +Loading reference sequences... [0.984s] +Masking reference... [0.639s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.28s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.219s] +Building query seed array... [0.103s] +Computing hash join... [0.112s] +Building seed filter... [0.007s] +Searching alignments... [0.19s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.225s] +Building query seed array... [0.112s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.185s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.106s] +Computing hash join... [0.089s] +Building seed filter... [0.005s] +Searching alignments... [0.176s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.199s] +Building query seed array... [0.096s] +Computing hash join... [0.087s] +Building seed filter... [0.005s] +Searching alignments... [0.197s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.2s] +Building query seed array... [0.092s] +Computing hash join... [0.085s] +Building seed filter... [0.005s] +Searching alignments... [0.165s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.225s] +Building query seed array... [0.106s] +Computing hash join... [0.086s] +Building seed filter... [0.006s] +Searching alignments... [0.163s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.23s] +Building query seed array... [0.101s] +Computing hash join... [0.084s] +Building seed filter... [0.006s] +Searching alignments... [0.155s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.092s] +Computing hash join... [0.084s] +Building seed filter... [0.01s] +Searching alignments... [0.147s] +Deallocating buffers... [0.05s] +Computing alignments... [2.243s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.018s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.696s +Reported 20722 pairwise alignments, 20823 HSPs. +20722 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/Sr_rh_Halb_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Halb_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn08.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn08.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.103s] +Masking queries... [0.349s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.109s] +Allocating buffers... [0s] +Loading reference sequences... [1.095s] +Masking reference... [0.693s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.288s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.23s] +Building query seed array... [0.091s] +Computing hash join... [0.117s] +Building seed filter... [0.006s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.083s] +Computing hash join... [0.097s] +Building seed filter... [0.005s] +Searching alignments... [0.108s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.084s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.113s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.069s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.074s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.084s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.082s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.103s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.075s] +Computing hash join... [0.096s] +Building seed filter... [0.005s] +Searching alignments... [0.096s] +Deallocating buffers... [0.057s] +Computing alignments... [1.258s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0.011s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.25s +Reported 12050 pairwise alignments, 12165 HSPs. +12050 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.099s] +Masking queries... [0.286s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.126s] +Allocating buffers... [0s] +Loading reference sequences... [0.98s] +Masking reference... [0.639s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.282s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.095s] +Computing hash join... [0.115s] +Building seed filter... [0.006s] +Searching alignments... [0.385s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.078s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.293s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.087s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.273s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.201s] +Building query seed array... [0.07s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.262s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.199s] +Building query seed array... [0.073s] +Computing hash join... [0.094s] +Building seed filter... [0.006s] +Searching alignments... [0.237s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.225s] +Building query seed array... [0.085s] +Computing hash join... [0.086s] +Building seed filter... [0.006s] +Searching alignments... [0.204s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.239s] +Building query seed array... [0.086s] +Computing hash join... [0.089s] +Building seed filter... [0.005s] +Searching alignments... [0.221s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.072s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.249s] +Deallocating buffers... [0.052s] +Computing alignments... [2.283s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.188s +Reported 24842 pairwise alignments, 25154 HSPs. +24842 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/Sr_rh_Nn08_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn08_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Tx01.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Tx01.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.36s] +Masking queries... [0.364s] +Building query seed set... [0.057s] +Algorithm: Double-indexed +Building query histograms... [0.136s] +Allocating buffers... [0s] +Loading reference sequences... [1.097s] +Masking reference... [0.692s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.31s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.235s] +Building query seed array... [0.103s] +Computing hash join... [0.097s] +Building seed filter... [0.005s] +Searching alignments... [0.168s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.097s] +Computing hash join... [0.086s] +Building seed filter... [0.006s] +Searching alignments... [0.144s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.254s] +Building query seed array... [0.099s] +Computing hash join... [0.083s] +Building seed filter... [0.006s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.086s] +Computing hash join... [0.084s] +Building seed filter... [0.007s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.081s] +Computing hash join... [0.082s] +Building seed filter... [0.005s] +Searching alignments... [0.124s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.1s] +Computing hash join... [0.083s] +Building seed filter... [0.005s] +Searching alignments... [0.126s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.101s] +Computing hash join... [0.082s] +Building seed filter... [0.005s] +Searching alignments... [0.123s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.204s] +Building query seed array... [0.083s] +Computing hash join... [0.081s] +Building seed filter... [0.006s] +Searching alignments... [0.122s] +Deallocating buffers... [0.054s] +Computing alignments... [1.626s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.204s +Reported 11831 pairwise alignments, 11836 HSPs. +11831 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.353s] +Masking queries... [0.399s] +Building query seed set... [0.057s] +Algorithm: Double-indexed +Building query histograms... [0.136s] +Allocating buffers... [0s] +Loading reference sequences... [0.991s] +Masking reference... [0.629s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.301s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.215s] +Building query seed array... [0.098s] +Computing hash join... [0.106s] +Building seed filter... [0.005s] +Searching alignments... [0.227s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.093s] +Computing hash join... [0.078s] +Building seed filter... [0.006s] +Searching alignments... [0.231s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.097s] +Computing hash join... [0.079s] +Building seed filter... [0.006s] +Searching alignments... [0.208s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.194s] +Building query seed array... [0.101s] +Computing hash join... [0.083s] +Building seed filter... [0.006s] +Searching alignments... [0.223s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.196s] +Building query seed array... [0.089s] +Computing hash join... [0.078s] +Building seed filter... [0.006s] +Searching alignments... [0.206s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.109s] +Computing hash join... [0.079s] +Building seed filter... [0.006s] +Searching alignments... [0.194s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.235s] +Building query seed array... [0.101s] +Computing hash join... [0.076s] +Building seed filter... [0.006s] +Searching alignments... [0.184s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.089s] +Computing hash join... [0.076s] +Building seed filter... [0.006s] +Searching alignments... [0.179s] +Deallocating buffers... [0.051s] +Computing alignments... [2.893s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.754s +Reported 28648 pairwise alignments, 28728 HSPs. +28648 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/Sr_rh_Tx01_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Tx01_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn07.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn07.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.584s] +Masking queries... [0.4s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.149s] +Allocating buffers... [0s] +Loading reference sequences... [1.076s] +Masking reference... [0.7s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.299s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.239s] +Building query seed array... [0.125s] +Computing hash join... [0.121s] +Building seed filter... [0.008s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.113s] +Computing hash join... [0.102s] +Building seed filter... [0.009s] +Searching alignments... [0.16s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.115s] +Computing hash join... [0.104s] +Building seed filter... [0.007s] +Searching alignments... [0.164s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.097s] +Computing hash join... [0.102s] +Building seed filter... [0.007s] +Searching alignments... [0.166s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.105s] +Computing hash join... [0.104s] +Building seed filter... [0.007s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.236s] +Building query seed array... [0.112s] +Computing hash join... [0.101s] +Building seed filter... [0.008s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.12s] +Computing hash join... [0.101s] +Building seed filter... [0.007s] +Searching alignments... [0.146s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.097s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.142s] +Deallocating buffers... [0.058s] +Computing alignments... [1.725s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.019s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.006s +Reported 16673 pairwise alignments, 16752 HSPs. +16673 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.596s] +Masking queries... [0.392s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.148s] +Allocating buffers... [0s] +Loading reference sequences... [0.977s] +Masking reference... [0.637s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.274s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.116s] +Computing hash join... [0.107s] +Building seed filter... [0.009s] +Searching alignments... [0.422s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.114s] +Computing hash join... [0.096s] +Building seed filter... [0.009s] +Searching alignments... [0.35s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.235s] +Building query seed array... [0.118s] +Computing hash join... [0.095s] +Building seed filter... [0.009s] +Searching alignments... [0.287s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.102s] +Computing hash join... [0.095s] +Building seed filter... [0.009s] +Searching alignments... [0.318s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.199s] +Building query seed array... [0.098s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.304s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.109s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.286s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.115s] +Computing hash join... [0.096s] +Building seed filter... [0.008s] +Searching alignments... [0.25s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.199s] +Building query seed array... [0.101s] +Computing hash join... [0.095s] +Building seed filter... [0.008s] +Searching alignments... [0.277s] +Deallocating buffers... [0.049s] +Computing alignments... [3.133s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.019s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.276s +Reported 35577 pairwise alignments, 35833 HSPs. +35577 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/Sr_rh_Nn07_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn07_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn01.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn01.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.35s] +Masking queries... [0.382s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.144s] +Allocating buffers... [0s] +Loading reference sequences... [1.137s] +Masking reference... [0.713s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.301s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.239s] +Building query seed array... [0.106s] +Computing hash join... [0.124s] +Building seed filter... [0.009s] +Searching alignments... [0.156s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.102s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.152s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.112s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.098s] +Computing hash join... [0.099s] +Building seed filter... [0.007s] +Searching alignments... [0.153s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.102s] +Computing hash join... [0.097s] +Building seed filter... [0.008s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.236s] +Building query seed array... [0.111s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.11s] +Computing hash join... [0.1s] +Building seed filter... [0.008s] +Searching alignments... [0.142s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.102s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.147s] +Deallocating buffers... [0.059s] +Computing alignments... [1.749s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.023s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.738s +Reported 16335 pairwise alignments, 16447 HSPs. +16335 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.37s] +Masking queries... [0.377s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.163s] +Allocating buffers... [0s] +Loading reference sequences... [1.004s] +Masking reference... [0.651s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.275s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.217s] +Building query seed array... [0.111s] +Computing hash join... [0.109s] +Building seed filter... [0.007s] +Searching alignments... [0.411s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.228s] +Building query seed array... [0.105s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.346s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.106s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.337s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.093s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.366s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.202s] +Building query seed array... [0.097s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.276s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.106s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.262s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.238s] +Building query seed array... [0.117s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.237s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.099s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.239s] +Deallocating buffers... [0.051s] +Computing alignments... [3.114s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.019s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.004s +Reported 34435 pairwise alignments, 34792 HSPs. +34435 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/Sr_rh_Nn01_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn01_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv04.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv04.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.941s] +Masking queries... [0.466s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.161s] +Allocating buffers... [0s] +Loading reference sequences... [1.227s] +Masking reference... [0.695s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.309s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.228s] +Building query seed array... [0.124s] +Computing hash join... [0.118s] +Building seed filter... [0.007s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.115s] +Computing hash join... [0.096s] +Building seed filter... [0.008s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.116s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.18s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.102s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.213s] +Building query seed array... [0.105s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.159s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.114s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.156s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.117s] +Computing hash join... [0.095s] +Building seed filter... [0.007s] +Searching alignments... [0.155s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.108s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.15s] +Deallocating buffers... [0.057s] +Computing alignments... [1.629s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.017s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.591s +Reported 16731 pairwise alignments, 16862 HSPs. +16731 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.937s] +Masking queries... [0.481s] +Building query seed set... [0.051s] +Algorithm: Double-indexed +Building query histograms... [0.175s] +Allocating buffers... [0s] +Loading reference sequences... [0.984s] +Masking reference... [0.641s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.275s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.218s] +Building query seed array... [0.129s] +Computing hash join... [0.112s] +Building seed filter... [0.01s] +Searching alignments... [0.36s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.226s] +Building query seed array... [0.114s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.31s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.116s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.265s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.108s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.322s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.197s] +Building query seed array... [0.11s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.229s] +Building query seed array... [0.116s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.249s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.124s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.248s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.107s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.237s] +Deallocating buffers... [0.052s] +Computing alignments... [3.118s] +Deallocating reference... [0.014s] +Loading reference sequences... [0s] +Deallocating buffers... [0.017s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.541s +Reported 33330 pairwise alignments, 33675 HSPs. +33330 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/Sr_rh_Bv04_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv04_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Usac.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Usac.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.916s] +Masking queries... [0.261s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.109s] +Allocating buffers... [0s] +Loading reference sequences... [1.093s] +Masking reference... [0.695s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.298s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.236s] +Building query seed array... [0.087s] +Computing hash join... [0.124s] +Building seed filter... [0.006s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.08s] +Computing hash join... [0.1s] +Building seed filter... [0.007s] +Searching alignments... [0.138s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.084s] +Computing hash join... [0.097s] +Building seed filter... [0.008s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.071s] +Computing hash join... [0.095s] +Building seed filter... [0.007s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.074s] +Computing hash join... [0.103s] +Building seed filter... [0.006s] +Searching alignments... [0.127s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.235s] +Building query seed array... [0.076s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.123s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.08s] +Computing hash join... [0.099s] +Building seed filter... [0.007s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.065s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.121s] +Deallocating buffers... [0.054s] +Computing alignments... [1.485s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.013s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.432s +Reported 18730 pairwise alignments, 18769 HSPs. +18730 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.923s] +Masking queries... [0.263s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.098s] +Allocating buffers... [0s] +Loading reference sequences... [0.988s] +Masking reference... [0.64s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.269s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.087s] +Computing hash join... [0.107s] +Building seed filter... [0.007s] +Searching alignments... [0.263s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.083s] +Computing hash join... [0.1s] +Building seed filter... [0.006s] +Searching alignments... [0.253s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.087s] +Computing hash join... [0.097s] +Building seed filter... [0.008s] +Searching alignments... [0.231s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.069s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.255s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.203s] +Building query seed array... [0.072s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.217s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.077s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.239s] +Building query seed array... [0.087s] +Computing hash join... [0.097s] +Building seed filter... [0.005s] +Searching alignments... [0.181s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.193s] +Building query seed array... [0.071s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.177s] +Deallocating buffers... [0.053s] +Computing alignments... [2.193s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.523s +Reported 40730 pairwise alignments, 40859 HSPs. +40730 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/Sr_rh_Usac_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Usac_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn04.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn04.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.191s] +Masking queries... [0.615s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.201s] +Allocating buffers... [0s] +Loading reference sequences... [1.266s] +Masking reference... [0.692s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.331s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.231s] +Building query seed array... [0.161s] +Computing hash join... [0.135s] +Building seed filter... [0.009s] +Searching alignments... [0.231s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.249s] +Building query seed array... [0.148s] +Computing hash join... [0.111s] +Building seed filter... [0.009s] +Searching alignments... [0.197s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.156s] +Computing hash join... [0.112s] +Building seed filter... [0.009s] +Searching alignments... [0.205s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.135s] +Computing hash join... [0.112s] +Building seed filter... [0.009s] +Searching alignments... [0.207s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.136s] +Computing hash join... [0.115s] +Building seed filter... [0.014s] +Searching alignments... [0.192s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.153s] +Computing hash join... [0.12s] +Building seed filter... [0.009s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.252s] +Building query seed array... [0.155s] +Computing hash join... [0.109s] +Building seed filter... [0.008s] +Searching alignments... [0.198s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.215s] +Building query seed array... [0.135s] +Computing hash join... [0.118s] +Building seed filter... [0.008s] +Searching alignments... [0.181s] +Deallocating buffers... [0.059s] +Computing alignments... [2.565s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0.023s] +Deallocating queries... [0.021s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.715s +Reported 27246 pairwise alignments, 27470 HSPs. +27246 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.212s] +Masking queries... [0.606s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.199s] +Allocating buffers... [0s] +Loading reference sequences... [1.006s] +Masking reference... [0.651s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.285s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.217s] +Building query seed array... [0.152s] +Computing hash join... [0.111s] +Building seed filter... [0.007s] +Searching alignments... [0.637s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.156s] +Computing hash join... [0.104s] +Building seed filter... [0.01s] +Searching alignments... [0.491s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.15s] +Computing hash join... [0.106s] +Building seed filter... [0.009s] +Searching alignments... [0.405s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.194s] +Building query seed array... [0.13s] +Computing hash join... [0.101s] +Building seed filter... [0.007s] +Searching alignments... [0.545s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.198s] +Building query seed array... [0.132s] +Computing hash join... [0.101s] +Building seed filter... [0.009s] +Searching alignments... [0.449s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.151s] +Computing hash join... [0.1s] +Building seed filter... [0.01s] +Searching alignments... [0.443s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.235s] +Building query seed array... [0.153s] +Computing hash join... [0.103s] +Building seed filter... [0.01s] +Searching alignments... [0.358s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.144s] +Computing hash join... [0.103s] +Building seed filter... [0.008s] +Searching alignments... [0.407s] +Deallocating buffers... [0.052s] +Computing alignments... [4.479s] +Deallocating reference... [0.014s] +Loading reference sequences... [0s] +Deallocating buffers... [0.024s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 17.183s +Reported 56655 pairwise alignments, 57309 HSPs. +56655 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/Sr_rh_Nn04_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn04_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Emar.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Emar.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.531s] +Masking queries... [0.153s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.07s] +Allocating buffers... [0s] +Loading reference sequences... [1.189s] +Masking reference... [0.687s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.301s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.057s] +Computing hash join... [0.121s] +Building seed filter... [0.005s] +Searching alignments... [0.079s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.054s] +Computing hash join... [0.088s] +Building seed filter... [0.004s] +Searching alignments... [0.078s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.059s] +Computing hash join... [0.085s] +Building seed filter... [0.006s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.204s] +Building query seed array... [0.047s] +Computing hash join... [0.079s] +Building seed filter... [0.006s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.206s] +Building query seed array... [0.053s] +Computing hash join... [0.083s] +Building seed filter... [0.005s] +Searching alignments... [0.075s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.059s] +Computing hash join... [0.085s] +Building seed filter... [0.005s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.243s] +Building query seed array... [0.054s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.069s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.059s] +Computing hash join... [0.087s] +Building seed filter... [0.005s] +Searching alignments... [0.065s] +Deallocating buffers... [0.056s] +Computing alignments... [0.634s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.005s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.328s +Reported 5342 pairwise alignments, 5400 HSPs. +5342 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.534s] +Masking queries... [0.167s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.067s] +Allocating buffers... [0s] +Loading reference sequences... [0.981s] +Masking reference... [0.636s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.274s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.067s] +Computing hash join... [0.105s] +Building seed filter... [0.005s] +Searching alignments... [0.102s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.064s] +Computing hash join... [0.082s] +Building seed filter... [0.005s] +Searching alignments... [0.101s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.237s] +Building query seed array... [0.056s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.092s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.052s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.097s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.191s] +Building query seed array... [0.051s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.085s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.216s] +Building query seed array... [0.057s] +Computing hash join... [0.081s] +Building seed filter... [0.006s] +Searching alignments... [0.083s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.056s] +Computing hash join... [0.08s] +Building seed filter... [0.005s] +Searching alignments... [0.08s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.043s] +Computing hash join... [0.078s] +Building seed filter... [0.005s] +Searching alignments... [0.087s] +Deallocating buffers... [0.051s] +Computing alignments... [1.093s] +Deallocating reference... [0.017s] +Loading reference sequences... [0s] +Deallocating buffers... [0.008s] +Deallocating queries... [0.008s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.522s +Reported 11056 pairwise alignments, 11214 HSPs. +11056 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/Sr_rh_Emar_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Emar_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Rsp1.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Rsp1.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.033s] +Masking queries... [0.559s] +Building query seed set... [0.049s] +Algorithm: Double-indexed +Building query histograms... [0.094s] +Allocating buffers... [0s] +Loading reference sequences... [1.11s] +Masking reference... [0.694s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.319s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.232s] +Building query seed array... [0.075s] +Computing hash join... [0.125s] +Building seed filter... [0.007s] +Searching alignments... [0.126s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.065s] +Computing hash join... [0.105s] +Building seed filter... [0.005s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.075s] +Computing hash join... [0.104s] +Building seed filter... [0.005s] +Searching alignments... [0.118s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.217s] +Building query seed array... [0.077s] +Computing hash join... [0.1s] +Building seed filter... [0.005s] +Searching alignments... [0.115s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.223s] +Building query seed array... [0.063s] +Computing hash join... [0.091s] +Building seed filter... [0.005s] +Searching alignments... [0.098s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.069s] +Computing hash join... [0.097s] +Building seed filter... [0.005s] +Searching alignments... [0.1s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.244s] +Building query seed array... [0.069s] +Computing hash join... [0.092s] +Building seed filter... [0.005s] +Searching alignments... [0.1s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.206s] +Building query seed array... [0.059s] +Computing hash join... [0.091s] +Building seed filter... [0.005s] +Searching alignments... [0.097s] +Deallocating buffers... [0.058s] +Computing alignments... [1.22s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.348s +Reported 17091 pairwise alignments, 17125 HSPs. +17091 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.034s] +Masking queries... [0.266s] +Building query seed set... [0.049s] +Algorithm: Double-indexed +Building query histograms... [0.093s] +Allocating buffers... [0s] +Loading reference sequences... [0.989s] +Masking reference... [0.658s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.292s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.222s] +Building query seed array... [0.079s] +Computing hash join... [0.132s] +Building seed filter... [0.005s] +Searching alignments... [0.202s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.069s] +Computing hash join... [0.1s] +Building seed filter... [0.006s] +Searching alignments... [0.201s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.238s] +Building query seed array... [0.067s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.064s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.181s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.199s] +Building query seed array... [0.058s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.065s] +Computing hash join... [0.092s] +Building seed filter... [0.005s] +Searching alignments... [0.177s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.228s] +Building query seed array... [0.065s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.154s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.194s] +Building query seed array... [0.062s] +Computing hash join... [0.087s] +Building seed filter... [0.005s] +Searching alignments... [0.154s] +Deallocating buffers... [0.052s] +Computing alignments... [2.101s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.112s +Reported 36259 pairwise alignments, 36348 HSPs. +36259 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/Sr_rh_Rsp1_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Rsp1_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Sspa.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Sspa.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.778s] +Masking queries... [0.215s] +Building query seed set... [0.043s] +Algorithm: Double-indexed +Building query histograms... [0.081s] +Allocating buffers... [0s] +Loading reference sequences... [1.097s] +Masking reference... [0.696s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.303s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.232s] +Building query seed array... [0.068s] +Computing hash join... [0.132s] +Building seed filter... [0.006s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.062s] +Computing hash join... [0.108s] +Building seed filter... [0.006s] +Searching alignments... [0.117s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.256s] +Building query seed array... [0.064s] +Computing hash join... [0.101s] +Building seed filter... [0.006s] +Searching alignments... [0.105s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.054s] +Computing hash join... [0.098s] +Building seed filter... [0.006s] +Searching alignments... [0.107s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.219s] +Building query seed array... [0.054s] +Computing hash join... [0.094s] +Building seed filter... [0.005s] +Searching alignments... [0.1s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.068s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.256s] +Building query seed array... [0.065s] +Computing hash join... [0.092s] +Building seed filter... [0.006s] +Searching alignments... [0.093s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.206s] +Building query seed array... [0.057s] +Computing hash join... [0.095s] +Building seed filter... [0.005s] +Searching alignments... [0.098s] +Deallocating buffers... [0.056s] +Computing alignments... [1.085s] +Deallocating reference... [0.019s] +Loading reference sequences... [0s] +Deallocating buffers... [0.008s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.486s +Reported 10845 pairwise alignments, 10920 HSPs. +10845 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.783s] +Masking queries... [0.228s] +Building query seed set... [0.043s] +Algorithm: Double-indexed +Building query histograms... [0.081s] +Allocating buffers... [0s] +Loading reference sequences... [0.979s] +Masking reference... [0.641s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.291s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.219s] +Building query seed array... [0.07s] +Computing hash join... [0.109s] +Building seed filter... [0.005s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.058s] +Computing hash join... [0.096s] +Building seed filter... [0.005s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.23s] +Building query seed array... [0.066s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.135s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.056s] +Computing hash join... [0.093s] +Building seed filter... [0.005s] +Searching alignments... [0.144s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.193s] +Building query seed array... [0.055s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.124s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.057s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.132s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.061s] +Computing hash join... [0.087s] +Building seed filter... [0.006s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.056s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.119s] +Deallocating buffers... [0.05s] +Computing alignments... [1.576s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.009s] +Deallocating queries... [0.011s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 8.814s +Reported 23074 pairwise alignments, 23301 HSPs. +23074 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/Sr_rh_Sspa_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Sspa_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Hhir.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Hhir.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.628s] +Masking queries... [0.179s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.08s] +Allocating buffers... [0s] +Loading reference sequences... [1.105s] +Masking reference... [0.806s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.304s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.241s] +Building query seed array... [0.061s] +Computing hash join... [0.107s] +Building seed filter... [0.006s] +Searching alignments... [0.111s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.243s] +Building query seed array... [0.055s] +Computing hash join... [0.084s] +Building seed filter... [0.005s] +Searching alignments... [0.079s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.056s] +Computing hash join... [0.077s] +Building seed filter... [0.005s] +Searching alignments... [0.081s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.053s] +Computing hash join... [0.078s] +Building seed filter... [0.005s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.055s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.072s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.053s] +Computing hash join... [0.074s] +Building seed filter... [0.005s] +Searching alignments... [0.081s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.057s] +Computing hash join... [0.074s] +Building seed filter... [0.005s] +Searching alignments... [0.079s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.055s] +Computing hash join... [0.077s] +Building seed filter... [0.004s] +Searching alignments... [0.074s] +Deallocating buffers... [0.056s] +Computing alignments... [0.796s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.007s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.726s +Reported 6766 pairwise alignments, 6777 HSPs. +6766 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.632s] +Masking queries... [0.235s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.074s] +Allocating buffers... [0s] +Loading reference sequences... [1.024s] +Masking reference... [1.658s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.282s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.222s] +Building query seed array... [0.067s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.054s] +Computing hash join... [0.081s] +Building seed filter... [0.005s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.059s] +Computing hash join... [0.075s] +Building seed filter... [0.005s] +Searching alignments... [0.107s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.05s] +Computing hash join... [0.076s] +Building seed filter... [0.006s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.195s] +Building query seed array... [0.054s] +Computing hash join... [0.076s] +Building seed filter... [0.004s] +Searching alignments... [0.101s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.221s] +Building query seed array... [0.054s] +Computing hash join... [0.07s] +Building seed filter... [0.005s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.227s] +Building query seed array... [0.06s] +Computing hash join... [0.072s] +Building seed filter... [0.005s] +Searching alignments... [0.101s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.05s] +Computing hash join... [0.071s] +Building seed filter... [0.005s] +Searching alignments... [0.091s] +Deallocating buffers... [0.053s] +Computing alignments... [1.324s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.01s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.073s +Reported 14202 pairwise alignments, 14244 HSPs. +14202 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/Sr_rh_Hhir_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Hhir_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn12.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn12.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.207s] +Masking queries... [0.31s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.142s] +Allocating buffers... [0s] +Loading reference sequences... [1.123s] +Masking reference... [0.708s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.292s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.227s] +Building query seed array... [0.1s] +Computing hash join... [0.109s] +Building seed filter... [0.005s] +Searching alignments... [0.135s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.095s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.127s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.092s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.132s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.084s] +Computing hash join... [0.096s] +Building seed filter... [0.005s] +Searching alignments... [0.128s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.084s] +Computing hash join... [0.095s] +Building seed filter... [0.007s] +Searching alignments... [0.119s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.092s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.116s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.251s] +Building query seed array... [0.097s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.088s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.116s] +Deallocating buffers... [0.056s] +Computing alignments... [1.169s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.013s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.541s +Reported 12180 pairwise alignments, 12261 HSPs. +12180 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.216s] +Masking queries... [0.327s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.124s] +Allocating buffers... [0s] +Loading reference sequences... [0.989s] +Masking reference... [0.637s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.275s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.222s] +Building query seed array... [0.091s] +Computing hash join... [0.096s] +Building seed filter... [0.005s] +Searching alignments... [0.209s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.23s] +Building query seed array... [0.099s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.195s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.103s] +Computing hash join... [0.089s] +Building seed filter... [0.006s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.084s] +Computing hash join... [0.091s] +Building seed filter... [0.006s] +Searching alignments... [0.179s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.196s] +Building query seed array... [0.089s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.168s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.247s] +Building query seed array... [0.115s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.175s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.097s] +Computing hash join... [0.087s] +Building seed filter... [0.007s] +Searching alignments... [0.166s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.191s] +Building query seed array... [0.09s] +Computing hash join... [0.092s] +Building seed filter... [0.007s] +Searching alignments... [0.159s] +Deallocating buffers... [0.053s] +Computing alignments... [2.15s] +Deallocating reference... [0.017s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.621s +Reported 26508 pairwise alignments, 26714 HSPs. +26508 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/Sr_rh_Nn12_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn12_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn14.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn14.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.174s] +Masking queries... [0.328s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.13s] +Allocating buffers... [0s] +Loading reference sequences... [1.103s] +Masking reference... [0.708s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.315s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.238s] +Building query seed array... [0.101s] +Computing hash join... [0.11s] +Building seed filter... [0.007s] +Searching alignments... [0.135s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.239s] +Building query seed array... [0.093s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.128s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.093s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.131s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.087s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.129s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.209s] +Building query seed array... [0.085s] +Computing hash join... [0.098s] +Building seed filter... [0.006s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.249s] +Building query seed array... [0.099s] +Computing hash join... [0.095s] +Building seed filter... [0.005s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.093s] +Computing hash join... [0.098s] +Building seed filter... [0.006s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.209s] +Building query seed array... [0.078s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.115s] +Deallocating buffers... [0.058s] +Computing alignments... [1.155s] +Deallocating reference... [0.02s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.521s +Reported 12561 pairwise alignments, 12646 HSPs. +12561 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.193s] +Masking queries... [0.313s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.136s] +Allocating buffers... [0s] +Loading reference sequences... [1.003s] +Masking reference... [0.644s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.283s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.217s] +Building query seed array... [0.093s] +Computing hash join... [0.096s] +Building seed filter... [0.008s] +Searching alignments... [0.331s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.099s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.196s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.099s] +Computing hash join... [0.087s] +Building seed filter... [0.007s] +Searching alignments... [0.179s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.082s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.195s] +Building query seed array... [0.093s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.155s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.218s] +Building query seed array... [0.097s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.097s] +Computing hash join... [0.087s] +Building seed filter... [0.007s] +Searching alignments... [0.159s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.083s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.158s] +Deallocating buffers... [0.049s] +Computing alignments... [2.103s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.601s +Reported 27283 pairwise alignments, 27554 HSPs. +27283 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/Sr_rh_Nn14_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn14_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Gl02.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Gl02.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.204s] +Masking queries... [0.607s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.22s] +Allocating buffers... [0s] +Loading reference sequences... [1.093s] +Masking reference... [0.691s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.31s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.236s] +Building query seed array... [0.156s] +Computing hash join... [0.138s] +Building seed filter... [0.01s] +Searching alignments... [0.229s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.146s] +Computing hash join... [0.11s] +Building seed filter... [0.012s] +Searching alignments... [0.227s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.148s] +Computing hash join... [0.122s] +Building seed filter... [0.009s] +Searching alignments... [0.228s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.133s] +Computing hash join... [0.109s] +Building seed filter... [0.01s] +Searching alignments... [0.216s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.213s] +Building query seed array... [0.142s] +Computing hash join... [0.113s] +Building seed filter... [0.009s] +Searching alignments... [0.2s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.149s] +Computing hash join... [0.113s] +Building seed filter... [0.009s] +Searching alignments... [0.203s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.157s] +Computing hash join... [0.111s] +Building seed filter... [0.009s] +Searching alignments... [0.194s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.133s] +Computing hash join... [0.109s] +Building seed filter... [0.008s] +Searching alignments... [0.192s] +Deallocating buffers... [0.059s] +Computing alignments... [2.363s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.024s] +Deallocating queries... [0.024s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.428s +Reported 23866 pairwise alignments, 23926 HSPs. +23866 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.219s] +Masking queries... [0.556s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.203s] +Allocating buffers... [0s] +Loading reference sequences... [0.998s] +Masking reference... [0.642s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.287s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.214s] +Building query seed array... [0.154s] +Computing hash join... [0.112s] +Building seed filter... [0.01s] +Searching alignments... [0.337s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.153s] +Computing hash join... [0.106s] +Building seed filter... [0.011s] +Searching alignments... [0.315s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.235s] +Building query seed array... [0.158s] +Computing hash join... [0.104s] +Building seed filter... [0.008s] +Searching alignments... [0.305s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.222s] +Building query seed array... [0.142s] +Computing hash join... [0.109s] +Building seed filter... [0.007s] +Searching alignments... [0.315s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.135s] +Computing hash join... [0.103s] +Building seed filter... [0.01s] +Searching alignments... [0.271s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.152s] +Computing hash join... [0.105s] +Building seed filter... [0.01s] +Searching alignments... [0.287s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.23s] +Building query seed array... [0.154s] +Computing hash join... [0.109s] +Building seed filter... [0.009s] +Searching alignments... [0.271s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.139s] +Computing hash join... [0.106s] +Building seed filter... [0.01s] +Searching alignments... [0.261s] +Deallocating buffers... [0.049s] +Computing alignments... [4.193s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.024s] +Deallocating queries... [0.028s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 15.549s +Reported 46583 pairwise alignments, 46748 HSPs. +46583 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/Sr_rh_Gl02_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Gl02_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv05.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv05.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.826s] +Masking queries... [0.521s] +Building query seed set... [0.048s] +Algorithm: Double-indexed +Building query histograms... [0.15s] +Allocating buffers... [0s] +Loading reference sequences... [1.084s] +Masking reference... [0.692s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.299s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.236s] +Building query seed array... [0.124s] +Computing hash join... [0.115s] +Building seed filter... [0.007s] +Searching alignments... [0.182s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.242s] +Building query seed array... [0.111s] +Computing hash join... [0.095s] +Building seed filter... [0.008s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.255s] +Building query seed array... [0.113s] +Computing hash join... [0.098s] +Building seed filter... [0.008s] +Searching alignments... [0.171s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.098s] +Computing hash join... [0.097s] +Building seed filter... [0.009s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.101s] +Computing hash join... [0.098s] +Building seed filter... [0.008s] +Searching alignments... [0.163s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.108s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.247s] +Building query seed array... [0.112s] +Computing hash join... [0.097s] +Building seed filter... [0.007s] +Searching alignments... [0.152s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.098s] +Computing hash join... [0.096s] +Building seed filter... [0.009s] +Searching alignments... [0.147s] +Deallocating buffers... [0.06s] +Computing alignments... [1.576s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.019s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.239s +Reported 16723 pairwise alignments, 16853 HSPs. +16723 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/BvE +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.809s] +Masking queries... [0.438s] +Building query seed set... [0.048s] +Algorithm: Double-indexed +Building query histograms... [0.148s] +Allocating buffers... [0s] +Loading reference sequences... [0.987s] +Masking reference... [0.634s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.266s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.226s] +Building query seed array... [0.116s] +Computing hash join... [0.108s] +Building seed filter... [0.007s] +Searching alignments... [0.315s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.228s] +Building query seed array... [0.106s] +Computing hash join... [0.088s] +Building seed filter... [0.007s] +Searching alignments... [0.285s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.111s] +Computing hash join... [0.089s] +Building seed filter... [0.008s] +Searching alignments... [0.266s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.197s] +Building query seed array... [0.099s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.324s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.201s] +Building query seed array... [0.098s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.248s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.111s] +Computing hash join... [0.09s] +Building seed filter... [0.006s] +Searching alignments... [0.243s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.232s] +Building query seed array... [0.11s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.249s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.201s] +Building query seed array... [0.1s] +Computing hash join... [0.088s] +Building seed filter... [0.006s] +Searching alignments... [0.231s] +Deallocating buffers... [0.052s] +Computing alignments... [3.101s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.126s +Reported 34038 pairwise alignments, 34375 HSPs. +34038 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/Sr_rh_Bv05_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv05_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Bv01.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Bv01.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.556s] +Masking queries... [0.392s] +Building query seed set... [0.047s] +Algorithm: Double-indexed +Building query histograms... [0.126s] +Allocating buffers... [0s] +Loading reference sequences... [1.102s] +Masking reference... [0.703s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.301s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.235s] +Building query seed array... [0.093s] +Computing hash join... [0.113s] +Building seed filter... [0.007s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.097s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.25s] +Building query seed array... [0.095s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.154s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.212s] +Building query seed array... [0.079s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.149s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.217s] +Building query seed array... [0.081s] +Computing hash join... [0.093s] +Building seed filter... [0.006s] +Searching alignments... [0.126s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.238s] +Building query seed array... [0.09s] +Computing hash join... [0.097s] +Building seed filter... [0.006s] +Searching alignments... [0.131s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.09s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.131s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.211s] +Building query seed array... [0.082s] +Computing hash join... [0.094s] +Building seed filter... [0.007s] +Searching alignments... [0.129s] +Deallocating buffers... [0.056s] +Computing alignments... [1.438s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.329s +Reported 17468 pairwise alignments, 17618 HSPs. +17468 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.559s] +Masking queries... [0.421s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.123s] +Allocating buffers... [0s] +Loading reference sequences... [0.998s] +Masking reference... [0.638s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.275s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.222s] +Building query seed array... [0.099s] +Computing hash join... [0.116s] +Building seed filter... [0.01s] +Searching alignments... [0.318s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.237s] +Building query seed array... [0.097s] +Computing hash join... [0.096s] +Building seed filter... [0.007s] +Searching alignments... [0.276s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.238s] +Building query seed array... [0.099s] +Computing hash join... [0.092s] +Building seed filter... [0.009s] +Searching alignments... [0.286s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.202s] +Building query seed array... [0.086s] +Computing hash join... [0.09s] +Building seed filter... [0.008s] +Searching alignments... [0.309s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.199s] +Building query seed array... [0.077s] +Computing hash join... [0.092s] +Building seed filter... [0.008s] +Searching alignments... [0.244s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.086s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.207s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.093s] +Computing hash join... [0.095s] +Building seed filter... [0.008s] +Searching alignments... [0.204s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.08s] +Computing hash join... [0.093s] +Building seed filter... [0.008s] +Searching alignments... [0.204s] +Deallocating buffers... [0.05s] +Computing alignments... [2.675s] +Deallocating reference... [0.022s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 12.237s +Reported 35541 pairwise alignments, 35937 HSPs. +35541 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/Sr_rh_Bv01_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Bv01_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Slin.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Slin.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.36s] +Masking queries... [0.117s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.046s] +Allocating buffers... [0s] +Loading reference sequences... [1.089s] +Masking reference... [0.702s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.315s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.232s] +Building query seed array... [0.055s] +Computing hash join... [0.081s] +Building seed filter... [0.004s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.038s] +Computing hash join... [0.065s] +Building seed filter... [0.005s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.253s] +Building query seed array... [0.031s] +Computing hash join... [0.058s] +Building seed filter... [0.005s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.04s] +Computing hash join... [0.056s] +Building seed filter... [0.004s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.224s] +Building query seed array... [0.038s] +Computing hash join... [0.056s] +Building seed filter... [0.004s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.233s] +Building query seed array... [0.036s] +Computing hash join... [0.05s] +Building seed filter... [0.004s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.248s] +Building query seed array... [0.044s] +Computing hash join... [0.054s] +Building seed filter... [0.004s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.208s] +Building query seed array... [0.038s] +Computing hash join... [0.053s] +Building seed filter... [0.004s] +Searching alignments... [0.044s] +Deallocating buffers... [0.055s] +Computing alignments... [0.446s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.003s] +Deallocating queries... [0.005s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 6.344s +Reported 3720 pairwise alignments, 3786 HSPs. +3720 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.379s] +Masking queries... [0.163s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.046s] +Allocating buffers... [0s] +Loading reference sequences... [0.992s] +Masking reference... [0.639s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.286s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.049s] +Computing hash join... [0.074s] +Building seed filter... [0.005s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.222s] +Building query seed array... [0.036s] +Computing hash join... [0.054s] +Building seed filter... [0.005s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.23s] +Building query seed array... [0.034s] +Computing hash join... [0.052s] +Building seed filter... [0.005s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.192s] +Building query seed array... [0.038s] +Computing hash join... [0.05s] +Building seed filter... [0.004s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.191s] +Building query seed array... [0.035s] +Computing hash join... [0.05s] +Building seed filter... [0.005s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.213s] +Building query seed array... [0.039s] +Computing hash join... [0.05s] +Building seed filter... [0.005s] +Searching alignments... [0.069s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.231s] +Building query seed array... [0.036s] +Computing hash join... [0.05s] +Building seed filter... [0.005s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.195s] +Building query seed array... [0.036s] +Computing hash join... [0.051s] +Building seed filter... [0.004s] +Searching alignments... [0.066s] +Deallocating buffers... [0.051s] +Computing alignments... [0.736s] +Deallocating reference... [0.017s] +Loading reference sequences... [0s] +Deallocating buffers... [0.003s] +Deallocating queries... [0.005s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 6.473s +Reported 8131 pairwise alignments, 8278 HSPs. +8131 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/Sr_rh_Slin_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Slin_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Gl03.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Gl03.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.745s] +Masking queries... [0.498s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.179s] +Allocating buffers... [0s] +Loading reference sequences... [1.084s] +Masking reference... [0.714s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.306s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.233s] +Building query seed array... [0.138s] +Computing hash join... [0.135s] +Building seed filter... [0.011s] +Searching alignments... [0.228s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.248s] +Building query seed array... [0.133s] +Computing hash join... [0.113s] +Building seed filter... [0.009s] +Searching alignments... [0.215s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.254s] +Building query seed array... [0.138s] +Computing hash join... [0.118s] +Building seed filter... [0.009s] +Searching alignments... [0.237s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.229s] +Building query seed array... [0.127s] +Computing hash join... [0.114s] +Building seed filter... [0.008s] +Searching alignments... [0.216s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.208s] +Building query seed array... [0.116s] +Computing hash join... [0.111s] +Building seed filter... [0.008s] +Searching alignments... [0.195s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.244s] +Building query seed array... [0.128s] +Computing hash join... [0.111s] +Building seed filter... [0.009s] +Searching alignments... [0.193s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.245s] +Building query seed array... [0.13s] +Computing hash join... [0.11s] +Building seed filter... [0.009s] +Searching alignments... [0.196s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.114s] +Computing hash join... [0.111s] +Building seed filter... [0.008s] +Searching alignments... [0.191s] +Deallocating buffers... [0.059s] +Computing alignments... [2.383s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.022s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 12.681s +Reported 24064 pairwise alignments, 24123 HSPs. +24064 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.762s] +Masking queries... [0.457s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.176s] +Allocating buffers... [0s] +Loading reference sequences... [0.971s] +Masking reference... [0.64s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.287s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.211s] +Building query seed array... [0.128s] +Computing hash join... [0.12s] +Building seed filter... [0.009s] +Searching alignments... [0.31s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.224s] +Building query seed array... [0.127s] +Computing hash join... [0.104s] +Building seed filter... [0.01s] +Searching alignments... [0.308s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.234s] +Building query seed array... [0.133s] +Computing hash join... [0.105s] +Building seed filter... [0.009s] +Searching alignments... [0.287s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.205s] +Building query seed array... [0.116s] +Computing hash join... [0.102s] +Building seed filter... [0.008s] +Searching alignments... [0.288s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.199s] +Building query seed array... [0.12s] +Computing hash join... [0.101s] +Building seed filter... [0.008s] +Searching alignments... [0.253s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.225s] +Building query seed array... [0.129s] +Computing hash join... [0.102s] +Building seed filter... [0.009s] +Searching alignments... [0.246s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.236s] +Building query seed array... [0.145s] +Computing hash join... [0.106s] +Building seed filter... [0.009s] +Searching alignments... [0.249s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.196s] +Building query seed array... [0.116s] +Computing hash join... [0.105s] +Building seed filter... [0.009s] +Searching alignments... [0.242s] +Deallocating buffers... [0.05s] +Computing alignments... [3.854s] +Deallocating reference... [0.024s] +Loading reference sequences... [0s] +Deallocating buffers... [0.022s] +Deallocating queries... [0.022s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 14.194s +Reported 49617 pairwise alignments, 49761 HSPs. +49617 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/Sr_rh_Gl03_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Gl03_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn09.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn09.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.803s] +Masking queries... [0.454s] +Building query seed set... [0.029s] +Algorithm: Double-indexed +Building query histograms... [0.181s] +Allocating buffers... [0s] +Loading reference sequences... [1.117s] +Masking reference... [1.661s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.319s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.238s] +Building query seed array... [0.132s] +Computing hash join... [0.121s] +Building seed filter... [0.007s] +Searching alignments... [0.188s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.136s] +Computing hash join... [0.11s] +Building seed filter... [0.007s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.246s] +Building query seed array... [0.134s] +Computing hash join... [0.106s] +Building seed filter... [0.007s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.213s] +Building query seed array... [0.119s] +Computing hash join... [0.104s] +Building seed filter... [0.007s] +Searching alignments... [0.187s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.212s] +Building query seed array... [0.116s] +Computing hash join... [0.108s] +Building seed filter... [0.008s] +Searching alignments... [0.165s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.246s] +Building query seed array... [0.135s] +Computing hash join... [0.104s] +Building seed filter... [0.007s] +Searching alignments... [0.166s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.254s] +Building query seed array... [0.135s] +Computing hash join... [0.107s] +Building seed filter... [0.008s] +Searching alignments... [0.16s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.121s] +Computing hash join... [0.106s] +Building seed filter... [0.008s] +Searching alignments... [0.157s] +Deallocating buffers... [0.056s] +Computing alignments... [2.04s] +Deallocating reference... [0.026s] +Loading reference sequences... [0s] +Deallocating buffers... [0.02s] +Deallocating queries... [0.021s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 12.971s +Reported 21847 pairwise alignments, 22013 HSPs. +21847 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.803s] +Masking queries... [0.461s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.18s] +Allocating buffers... [0s] +Loading reference sequences... [0.989s] +Masking reference... [0.651s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.293s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.22s] +Building query seed array... [0.15s] +Computing hash join... [0.121s] +Building seed filter... [0.008s] +Searching alignments... [0.61s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.23s] +Building query seed array... [0.136s] +Computing hash join... [0.102s] +Building seed filter... [0.008s] +Searching alignments... [0.325s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.134s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.414s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.116s] +Computing hash join... [0.101s] +Building seed filter... [0.008s] +Searching alignments... [0.615s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.191s] +Building query seed array... [0.12s] +Computing hash join... [0.1s] +Building seed filter... [0.008s] +Searching alignments... [0.442s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.22s] +Building query seed array... [0.131s] +Computing hash join... [0.103s] +Building seed filter... [0.008s] +Searching alignments... [0.429s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.134s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.329s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.2s] +Building query seed array... [0.117s] +Computing hash join... [0.099s] +Building seed filter... [0.008s] +Searching alignments... [0.272s] +Deallocating buffers... [0.05s] +Computing alignments... [3.679s] +Deallocating reference... [0.013s] +Loading reference sequences... [0s] +Deallocating buffers... [0.022s] +Deallocating queries... [0.019s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 15.337s +Reported 44982 pairwise alignments, 45503 HSPs. +44982 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/Sr_rh_Nn09_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn09_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Nn13.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Nn13.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.271s] +Masking queries... [0.345s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.137s] +Allocating buffers... [0s] +Loading reference sequences... [1.096s] +Masking reference... [0.719s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.314s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.234s] +Building query seed array... [0.1s] +Computing hash join... [0.119s] +Building seed filter... [0.008s] +Searching alignments... [0.141s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.235s] +Building query seed array... [0.095s] +Computing hash join... [0.098s] +Building seed filter... [0.008s] +Searching alignments... [0.138s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.249s] +Building query seed array... [0.098s] +Computing hash join... [0.096s] +Building seed filter... [0.006s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.232s] +Building query seed array... [0.095s] +Computing hash join... [0.098s] +Building seed filter... [0.006s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.21s] +Building query seed array... [0.093s] +Computing hash join... [0.098s] +Building seed filter... [0.007s] +Searching alignments... [0.129s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.241s] +Building query seed array... [0.101s] +Computing hash join... [0.095s] +Building seed filter... [0.006s] +Searching alignments... [0.129s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.259s] +Building query seed array... [0.109s] +Computing hash join... [0.099s] +Building seed filter... [0.007s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.21s] +Building query seed array... [0.098s] +Computing hash join... [0.102s] +Building seed filter... [0.007s] +Searching alignments... [0.124s] +Deallocating buffers... [0.058s] +Computing alignments... [1.314s] +Deallocating reference... [0.018s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 9.969s +Reported 13752 pairwise alignments, 13820 HSPs. +13752 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.274s] +Masking queries... [0.342s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.147s] +Allocating buffers... [0s] +Loading reference sequences... [0.985s] +Masking reference... [0.639s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.287s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.219s] +Building query seed array... [0.101s] +Computing hash join... [0.099s] +Building seed filter... [0.006s] +Searching alignments... [0.36s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.221s] +Building query seed array... [0.1s] +Computing hash join... [0.09s] +Building seed filter... [0.007s] +Searching alignments... [0.286s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.101s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.244s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.198s] +Building query seed array... [0.091s] +Computing hash join... [0.093s] +Building seed filter... [0.007s] +Searching alignments... [0.312s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.201s] +Building query seed array... [0.089s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.189s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.227s] +Building query seed array... [0.103s] +Computing hash join... [0.091s] +Building seed filter... [0.008s] +Searching alignments... [0.191s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.233s] +Building query seed array... [0.101s] +Computing hash join... [0.089s] +Building seed filter... [0.007s] +Searching alignments... [0.19s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.194s] +Building query seed array... [0.089s] +Computing hash join... [0.091s] +Building seed filter... [0.007s] +Searching alignments... [0.182s] +Deallocating buffers... [0.052s] +Computing alignments... [2.506s] +Deallocating reference... [0.016s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 11.607s +Reported 29317 pairwise alignments, 29537 HSPs. +29317 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/Sr_rh_Nn13_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Nn13_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +Warning: [blastn] Examining 5 or more matches is recommended + + +BLASTing Sr_rh_Atps.200bp.fasta against the rDNA database + + +Binning Sequences from Sr_rh_Atps.200bp.fasta +as rDNA OR Potentially Protein-Coding + + + +Look for beegfs_NorRNAseqs.fasta +in the beegfs Folder + + +Next Script is: 2b_remove_Bact.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/micout.dmnd +Sequences = 1451565 +Letters = 458122721 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.145s] +Masking queries... [0.066s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.033s] +Allocating buffers... [0s] +Loading reference sequences... [1.088s] +Masking reference... [0.689s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.316s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.229s] +Building query seed array... [0.034s] +Computing hash join... [0.059s] +Building seed filter... [0.004s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.245s] +Building query seed array... [0.03s] +Computing hash join... [0.044s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.259s] +Building query seed array... [0.029s] +Computing hash join... [0.04s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.207s] +Building query seed array... [0.023s] +Computing hash join... [0.042s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.207s] +Building query seed array... [0.022s] +Computing hash join... [0.039s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.24s] +Building query seed array... [0.028s] +Computing hash join... [0.038s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.242s] +Building query seed array... [0.023s] +Computing hash join... [0.038s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.203s] +Building query seed array... [0.02s] +Computing hash join... [0.039s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Deallocating buffers... [0.055s] +Computing alignments... [0.218s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0.002s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 5.342s +Reported 2739 pairwise alignments, 2882 HSPs. +2739 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/BvE +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_BvsE/eukout.dmnd +Sequences = 884771 +Letters = 428734159 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.153s] +Masking queries... [0.058s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.032s] +Allocating buffers... [0s] +Loading reference sequences... [0.988s] +Masking reference... [0.639s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.282s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.221s] +Building query seed array... [0.028s] +Computing hash join... [0.039s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.223s] +Building query seed array... [0.028s] +Computing hash join... [0.034s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.22s] +Building query seed array... [0.026s] +Computing hash join... [0.035s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.189s] +Building query seed array... [0.024s] +Computing hash join... [0.034s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.191s] +Building query seed array... [0.027s] +Computing hash join... [0.037s] +Building seed filter... [0.004s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.212s] +Building query seed array... [0.023s] +Computing hash join... [0.034s] +Building seed filter... [0.004s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.226s] +Building query seed array... [0.025s] +Computing hash join... [0.034s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.191s] +Building query seed array... [0.027s] +Computing hash join... [0.035s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Deallocating buffers... [0.052s] +Computing alignments... [0.125s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0.002s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 4.805s +Reported 1289 pairwise alignments, 1382 HSPs. +1289 queries aligned. +/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/Sr_rh_Atps_NorRNAseqs.fasta + + +"BLAST"-ing against PROK database using DIAMOND: micout.dmnd + + + + +"BLAST"-ing against EUK database using DIAMOND: eukout.dmnd + + + +Look for Sr_rh_Atps_WTA_EPU.fasta in the beegfs Folder + + +Next Script is: 3_CountOGsDiamond.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.223s] +Masking queries... [0.467s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.145s] +Allocating buffers... [0s] +Loading reference sequences... [1.597s] +Masking reference... [1.189s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.447s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.345s] +Building query seed array... [0.103s] +Computing hash join... [0.14s] +Building seed filter... [0.008s] +Searching alignments... [0.396s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.38s] +Building query seed array... [0.099s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.39s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.375s] +Building query seed array... [0.102s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.36s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.323s] +Building query seed array... [0.086s] +Computing hash join... [0.124s] +Building seed filter... [0.009s] +Searching alignments... [0.364s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.319s] +Building query seed array... [0.091s] +Computing hash join... [0.128s] +Building seed filter... [0.011s] +Searching alignments... [0.325s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.374s] +Building query seed array... [0.102s] +Computing hash join... [0.124s] +Building seed filter... [0.008s] +Searching alignments... [0.31s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.096s] +Computing hash join... [0.13s] +Building seed filter... [0.009s] +Searching alignments... [0.332s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.086s] +Computing hash join... [0.124s] +Building seed filter... [0.008s] +Searching alignments... [0.312s] +Deallocating buffers... [0.093s] +Computing alignments... [7.27s] +Deallocating reference... [0.034s] +Loading reference sequences... [0s] +Deallocating buffers... [0.013s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 20.043s +Reported 354463 pairwise alignments, 356676 HSPs. +18186 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn02_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.297s] +Masking queries... [0.103s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.046s] +Allocating buffers... [0s] +Loading reference sequences... [1.611s] +Masking reference... [1.323s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.457s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.356s] +Building query seed array... [0.054s] +Computing hash join... [0.119s] +Building seed filter... [0.005s] +Searching alignments... [0.122s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.366s] +Building query seed array... [0.038s] +Computing hash join... [0.088s] +Building seed filter... [0.005s] +Searching alignments... [0.114s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.045s] +Computing hash join... [0.087s] +Building seed filter... [0.006s] +Searching alignments... [0.117s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.32s] +Building query seed array... [0.035s] +Computing hash join... [0.086s] +Building seed filter... [0.005s] +Searching alignments... [0.109s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.314s] +Building query seed array... [0.037s] +Computing hash join... [0.081s] +Building seed filter... [0.005s] +Searching alignments... [0.105s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.357s] +Building query seed array... [0.033s] +Computing hash join... [0.084s] +Building seed filter... [0.005s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.372s] +Building query seed array... [0.038s] +Computing hash join... [0.083s] +Building seed filter... [0.005s] +Searching alignments... [0.1s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.309s] +Building query seed array... [0.036s] +Computing hash join... [0.082s] +Building seed filter... [0.005s] +Searching alignments... [0.097s] +Deallocating buffers... [0.094s] +Computing alignments... [3.364s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.002s] +Deallocating queries... [0.003s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 12.124s +Reported 141326 pairwise alignments, 143069 HSPs. +7105 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Gspa_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.256s] +Masking queries... [0.342s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.132s] +Allocating buffers... [0s] +Loading reference sequences... [1.602s] +Masking reference... [1.062s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.466s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.349s] +Building query seed array... [0.104s] +Computing hash join... [0.142s] +Building seed filter... [0.008s] +Searching alignments... [0.431s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.369s] +Building query seed array... [0.103s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.4s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.109s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.369s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.095s] +Computing hash join... [0.13s] +Building seed filter... [0.009s] +Searching alignments... [0.357s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.321s] +Building query seed array... [0.095s] +Computing hash join... [0.127s] +Building seed filter... [0.009s] +Searching alignments... [0.377s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.368s] +Building query seed array... [0.104s] +Computing hash join... [0.125s] +Building seed filter... [0.008s] +Searching alignments... [0.342s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.375s] +Building query seed array... [0.101s] +Computing hash join... [0.125s] +Building seed filter... [0.009s] +Searching alignments... [0.375s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.339s] +Building query seed array... [0.092s] +Computing hash join... [0.125s] +Building seed filter... [0.011s] +Searching alignments... [0.28s] +Deallocating buffers... [0.096s] +Computing alignments... [7.307s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 20.076s +Reported 384072 pairwise alignments, 385929 HSPs. +19812 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn06_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.862s] +Masking queries... [0.491s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.163s] +Allocating buffers... [0s] +Loading reference sequences... [1.588s] +Masking reference... [1.043s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.442s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.343s] +Building query seed array... [0.117s] +Computing hash join... [0.146s] +Building seed filter... [0.009s] +Searching alignments... [0.495s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.107s] +Computing hash join... [0.13s] +Building seed filter... [0.01s] +Searching alignments... [0.448s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.393s] +Building query seed array... [0.123s] +Computing hash join... [0.127s] +Building seed filter... [0.01s] +Searching alignments... [0.399s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.327s] +Building query seed array... [0.109s] +Computing hash join... [0.13s] +Building seed filter... [0.009s] +Searching alignments... [0.403s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.314s] +Building query seed array... [0.1s] +Computing hash join... [0.133s] +Building seed filter... [0.01s] +Searching alignments... [0.386s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.111s] +Computing hash join... [0.121s] +Building seed filter... [0.011s] +Searching alignments... [0.354s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.119s] +Computing hash join... [0.131s] +Building seed filter... [0.011s] +Searching alignments... [0.369s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.099s] +Computing hash join... [0.129s] +Building seed filter... [0.012s] +Searching alignments... [0.33s] +Deallocating buffers... [0.096s] +Computing alignments... [8.467s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.026s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 22.321s +Reported 464168 pairwise alignments, 469292 HSPs. +24278 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv02_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.871s] +Masking queries... [0.256s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.104s] +Allocating buffers... [0s] +Loading reference sequences... [1.572s] +Masking reference... [1.051s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.473s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.348s] +Building query seed array... [0.083s] +Computing hash join... [0.161s] +Building seed filter... [0.008s] +Searching alignments... [0.223s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.093s] +Computing hash join... [0.126s] +Building seed filter... [0.007s] +Searching alignments... [0.208s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.397s] +Building query seed array... [0.077s] +Computing hash join... [0.122s] +Building seed filter... [0.008s] +Searching alignments... [0.206s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.074s] +Computing hash join... [0.123s] +Building seed filter... [0.007s] +Searching alignments... [0.199s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.313s] +Building query seed array... [0.073s] +Computing hash join... [0.122s] +Building seed filter... [0.008s] +Searching alignments... [0.177s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.081s] +Computing hash join... [0.123s] +Building seed filter... [0.008s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.082s] +Computing hash join... [0.123s] +Building seed filter... [0.008s] +Searching alignments... [0.177s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.074s] +Computing hash join... [0.123s] +Building seed filter... [0.008s] +Searching alignments... [0.183s] +Deallocating buffers... [0.095s] +Computing alignments... [5.504s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.014s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 16.157s +Reported 220479 pairwise alignments, 220717 HSPs. +10997 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Hind_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.123s] +Masking queries... [0.311s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.119s] +Allocating buffers... [0s] +Loading reference sequences... [1.577s] +Masking reference... [1.058s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.463s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.337s] +Building query seed array... [0.083s] +Computing hash join... [0.19s] +Building seed filter... [0.009s] +Searching alignments... [0.295s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.086s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.279s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.376s] +Building query seed array... [0.089s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.082s] +Computing hash join... [0.13s] +Building seed filter... [0.009s] +Searching alignments... [0.258s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.317s] +Building query seed array... [0.079s] +Computing hash join... [0.132s] +Building seed filter... [0.009s] +Searching alignments... [0.237s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.358s] +Building query seed array... [0.086s] +Computing hash join... [0.132s] +Building seed filter... [0.01s] +Searching alignments... [0.24s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.095s] +Computing hash join... [0.137s] +Building seed filter... [0.01s] +Searching alignments... [0.234s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.077s] +Computing hash join... [0.129s] +Building seed filter... [0.01s] +Searching alignments... [0.226s] +Deallocating buffers... [0.094s] +Computing alignments... [5.707s] +Deallocating reference... [0.021s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.012s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 17.227s +Reported 312594 pairwise alignments, 314266 HSPs. +16742 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn11_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.782s] +Masking queries... [0.507s] +Building query seed set... [0.052s] +Algorithm: Double-indexed +Building query histograms... [0.147s] +Allocating buffers... [0s] +Loading reference sequences... [1.609s] +Masking reference... [1.153s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.451s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.348s] +Building query seed array... [0.105s] +Computing hash join... [0.188s] +Building seed filter... [0.01s] +Searching alignments... [0.432s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.374s] +Building query seed array... [0.105s] +Computing hash join... [0.125s] +Building seed filter... [0.01s] +Searching alignments... [0.387s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.376s] +Building query seed array... [0.113s] +Computing hash join... [0.13s] +Building seed filter... [0.012s] +Searching alignments... [0.375s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.099s] +Computing hash join... [0.128s] +Building seed filter... [0.011s] +Searching alignments... [0.38s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.317s] +Building query seed array... [0.092s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.357s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.105s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.342s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.377s] +Building query seed array... [0.113s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.354s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.092s] +Computing hash join... [0.126s] +Building seed filter... [0.009s] +Searching alignments... [0.333s] +Deallocating buffers... [0.093s] +Computing alignments... [8.04s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.025s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.697s +Reported 442574 pairwise alignments, 448696 HSPs. +23147 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv03_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.595s] +Masking queries... [0.192s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.071s] +Allocating buffers... [0s] +Loading reference sequences... [1.591s] +Masking reference... [1.117s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.475s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.355s] +Building query seed array... [0.062s] +Computing hash join... [0.156s] +Building seed filter... [0.006s] +Searching alignments... [0.175s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.06s] +Computing hash join... [0.126s] +Building seed filter... [0.007s] +Searching alignments... [0.169s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.059s] +Computing hash join... [0.123s] +Building seed filter... [0.006s] +Searching alignments... [0.157s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.051s] +Computing hash join... [0.121s] +Building seed filter... [0.007s] +Searching alignments... [0.156s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.325s] +Building query seed array... [0.053s] +Computing hash join... [0.111s] +Building seed filter... [0.007s] +Searching alignments... [0.14s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.359s] +Building query seed array... [0.055s] +Computing hash join... [0.111s] +Building seed filter... [0.007s] +Searching alignments... [0.145s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.387s] +Building query seed array... [0.062s] +Computing hash join... [0.116s] +Building seed filter... [0.007s] +Searching alignments... [0.143s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.311s] +Building query seed array... [0.052s] +Computing hash join... [0.118s] +Building seed filter... [0.007s] +Searching alignments... [0.137s] +Deallocating buffers... [0.094s] +Computing alignments... [3.907s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.011s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.69s +Reported 174844 pairwise alignments, 175037 HSPs. +9075 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Gsp1_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.979s] +Masking queries... [0.266s] +Building query seed set... [0.044s] +Algorithm: Double-indexed +Building query histograms... [0.09s] +Allocating buffers... [0s] +Loading reference sequences... [1.6s] +Masking reference... [1.105s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.466s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.334s] +Building query seed array... [0.072s] +Computing hash join... [0.199s] +Building seed filter... [0.012s] +Searching alignments... [0.239s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.373s] +Building query seed array... [0.067s] +Computing hash join... [0.136s] +Building seed filter... [0.006s] +Searching alignments... [0.217s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.398s] +Building query seed array... [0.067s] +Computing hash join... [0.129s] +Building seed filter... [0.006s] +Searching alignments... [0.203s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.318s] +Building query seed array... [0.061s] +Computing hash join... [0.127s] +Building seed filter... [0.006s] +Searching alignments... [0.218s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.057s] +Computing hash join... [0.124s] +Building seed filter... [0.006s] +Searching alignments... [0.199s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.368s] +Building query seed array... [0.067s] +Computing hash join... [0.125s] +Building seed filter... [0.006s] +Searching alignments... [0.186s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.067s] +Computing hash join... [0.121s] +Building seed filter... [0.006s] +Searching alignments... [0.192s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.053s] +Computing hash join... [0.123s] +Building seed filter... [0.006s] +Searching alignments... [0.183s] +Deallocating buffers... [0.094s] +Computing alignments... [4.041s] +Deallocating reference... [0.031s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.009s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 14.867s +Reported 250746 pairwise alignments, 252735 HSPs. +13330 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv06_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.044s] +Masking queries... [0.068s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [1.589s] +Masking reference... [1.063s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.447s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.352s] +Building query seed array... [0.009s] +Computing hash join... [0.079s] +Building seed filter... [0.005s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.356s] +Building query seed array... [0.007s] +Computing hash join... [0.075s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.37s] +Building query seed array... [0.007s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.009s] +Computing hash join... [0.077s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.316s] +Building query seed array... [0.01s] +Computing hash join... [0.079s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.009s] +Computing hash join... [0.076s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.373s] +Building query seed array... [0.009s] +Computing hash join... [0.079s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.323s] +Building query seed array... [0.014s] +Computing hash join... [0.081s] +Building seed filter... [0.004s] +Searching alignments... [0.035s] +Deallocating buffers... [0.098s] +Computing alignments... [0.344s] +Deallocating reference... [0.03s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.513s +Reported 25154 pairwise alignments, 25213 HSPs. +1224 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Gl01_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.713s] +Masking queries... [0.209s] +Building query seed set... [0.043s] +Algorithm: Double-indexed +Building query histograms... [0.075s] +Allocating buffers... [0s] +Loading reference sequences... [1.588s] +Masking reference... [1.044s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.447s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.061s] +Computing hash join... [0.15s] +Building seed filter... [0.008s] +Searching alignments... [0.204s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.056s] +Computing hash join... [0.122s] +Building seed filter... [0.008s] +Searching alignments... [0.187s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.383s] +Building query seed array... [0.062s] +Computing hash join... [0.113s] +Building seed filter... [0.007s] +Searching alignments... [0.175s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.053s] +Computing hash join... [0.111s] +Building seed filter... [0.008s] +Searching alignments... [0.181s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.324s] +Building query seed array... [0.056s] +Computing hash join... [0.107s] +Building seed filter... [0.007s] +Searching alignments... [0.169s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.06s] +Computing hash join... [0.11s] +Building seed filter... [0.007s] +Searching alignments... [0.159s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.056s] +Computing hash join... [0.107s] +Building seed filter... [0.007s] +Searching alignments... [0.164s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.049s] +Computing hash join... [0.105s] +Building seed filter... [0.007s] +Searching alignments... [0.154s] +Deallocating buffers... [0.095s] +Computing alignments... [3.829s] +Deallocating reference... [0.029s] +Loading reference sequences... [0s] +Deallocating buffers... [0.006s] +Deallocating queries... [0.013s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.753s +Reported 211705 pairwise alignments, 212271 HSPs. +11092 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Esca_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.785s] +Masking queries... [0.213s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.086s] +Allocating buffers... [0s] +Loading reference sequences... [1.587s] +Masking reference... [1.104s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.447s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.339s] +Building query seed array... [0.057s] +Computing hash join... [0.175s] +Building seed filter... [0.007s] +Searching alignments... [0.205s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.369s] +Building query seed array... [0.062s] +Computing hash join... [0.141s] +Building seed filter... [0.009s] +Searching alignments... [0.187s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.062s] +Computing hash join... [0.137s] +Building seed filter... [0.008s] +Searching alignments... [0.175s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.058s] +Computing hash join... [0.129s] +Building seed filter... [0.007s] +Searching alignments... [0.175s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.312s] +Building query seed array... [0.059s] +Computing hash join... [0.135s] +Building seed filter... [0.007s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.358s] +Building query seed array... [0.07s] +Computing hash join... [0.132s] +Building seed filter... [0.007s] +Searching alignments... [0.178s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.07s] +Computing hash join... [0.128s] +Building seed filter... [0.008s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.318s] +Building query seed array... [0.054s] +Computing hash join... [0.127s] +Building seed filter... [0.007s] +Searching alignments... [0.155s] +Deallocating buffers... [0.097s] +Computing alignments... [4.221s] +Deallocating reference... [0.031s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.01s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 14.503s +Reported 205337 pairwise alignments, 206188 HSPs. +10532 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Calb_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.728s] +Masking queries... [0.252s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.088s] +Allocating buffers... [0s] +Loading reference sequences... [1.582s] +Masking reference... [1.099s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.488s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.082s] +Computing hash join... [0.19s] +Building seed filter... [0.005s] +Searching alignments... [0.184s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.066s] +Computing hash join... [0.148s] +Building seed filter... [0.006s] +Searching alignments... [0.177s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.071s] +Computing hash join... [0.143s] +Building seed filter... [0.006s] +Searching alignments... [0.171s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.067s] +Computing hash join... [0.142s] +Building seed filter... [0.006s] +Searching alignments... [0.169s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.319s] +Building query seed array... [0.061s] +Computing hash join... [0.137s] +Building seed filter... [0.008s] +Searching alignments... [0.159s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.082s] +Computing hash join... [0.14s] +Building seed filter... [0.008s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.081s] +Computing hash join... [0.139s] +Building seed filter... [0.008s] +Searching alignments... [0.154s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.064s] +Computing hash join... [0.137s] +Building seed filter... [0.008s] +Searching alignments... [0.152s] +Deallocating buffers... [0.094s] +Computing alignments... [3.98s] +Deallocating reference... [0.034s] +Loading reference sequences... [0s] +Deallocating buffers... [0.009s] +Deallocating queries... [0.01s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 14.357s +Reported 186068 pairwise alignments, 186401 HSPs. +9237 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Emac_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.138s] +Masking queries... [0.3s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.114s] +Allocating buffers... [0s] +Loading reference sequences... [1.623s] +Masking reference... [1.064s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.476s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.352s] +Building query seed array... [0.082s] +Computing hash join... [0.153s] +Building seed filter... [0.009s] +Searching alignments... [0.389s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.364s] +Building query seed array... [0.091s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.289s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.089s] +Computing hash join... [0.131s] +Building seed filter... [0.011s] +Searching alignments... [0.326s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.323s] +Building query seed array... [0.083s] +Computing hash join... [0.134s] +Building seed filter... [0.01s] +Searching alignments... [0.317s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.081s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.251s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.088s] +Computing hash join... [0.13s] +Building seed filter... [0.008s] +Searching alignments... [0.257s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.376s] +Building query seed array... [0.091s] +Computing hash join... [0.13s] +Building seed filter... [0.01s] +Searching alignments... [0.328s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.324s] +Building query seed array... [0.086s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.238s] +Deallocating buffers... [0.095s] +Computing alignments... [6.416s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.013s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 18.401s +Reported 332050 pairwise alignments, 333483 HSPs. +17028 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn05_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.339s] +Masking queries... [0.361s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.165s] +Allocating buffers... [0s] +Loading reference sequences... [1.602s] +Masking reference... [1.146s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.453s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.336s] +Building query seed array... [0.121s] +Computing hash join... [0.15s] +Building seed filter... [0.012s] +Searching alignments... [0.506s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.109s] +Computing hash join... [0.128s] +Building seed filter... [0.012s] +Searching alignments... [0.461s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.106s] +Computing hash join... [0.129s] +Building seed filter... [0.01s] +Searching alignments... [0.419s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.098s] +Computing hash join... [0.134s] +Building seed filter... [0.011s] +Searching alignments... [0.4s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.103s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.389s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.107s] +Computing hash join... [0.131s] +Building seed filter... [0.01s] +Searching alignments... [0.354s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.387s] +Building query seed array... [0.11s] +Computing hash join... [0.132s] +Building seed filter... [0.01s] +Searching alignments... [0.398s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.098s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.304s] +Deallocating buffers... [0.096s] +Computing alignments... [8.481s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.017s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.804s +Reported 396857 pairwise alignments, 399100 HSPs. +20167 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn10_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.203s] +Masking queries... [0.377s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.12s] +Allocating buffers... [0s] +Loading reference sequences... [1.61s] +Masking reference... [1.157s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.439s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.099s] +Computing hash join... [0.152s] +Building seed filter... [0.011s] +Searching alignments... [0.38s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.364s] +Building query seed array... [0.093s] +Computing hash join... [0.128s] +Building seed filter... [0.01s] +Searching alignments... [0.355s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.386s] +Building query seed array... [0.105s] +Computing hash join... [0.131s] +Building seed filter... [0.011s] +Searching alignments... [0.323s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.081s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.338s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.32s] +Building query seed array... [0.087s] +Computing hash join... [0.127s] +Building seed filter... [0.01s] +Searching alignments... [0.307s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.092s] +Computing hash join... [0.127s] +Building seed filter... [0.008s] +Searching alignments... [0.29s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.1s] +Computing hash join... [0.129s] +Building seed filter... [0.008s] +Searching alignments... [0.337s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.089s] +Computing hash join... [0.124s] +Building seed filter... [0.008s] +Searching alignments... [0.287s] +Deallocating buffers... [0.093s] +Computing alignments... [7.075s] +Deallocating reference... [0.039s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 19.506s +Reported 345493 pairwise alignments, 348050 HSPs. +17953 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn03_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.188s] +Masking queries... [0.38s] +Building query seed set... [0.065s] +Algorithm: Double-indexed +Building query histograms... [0.151s] +Allocating buffers... [0s] +Loading reference sequences... [1.613s] +Masking reference... [1.123s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.474s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.342s] +Building query seed array... [0.102s] +Computing hash join... [0.143s] +Building seed filter... [0.008s] +Searching alignments... [0.298s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.104s] +Computing hash join... [0.122s] +Building seed filter... [0.007s] +Searching alignments... [0.312s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.105s] +Computing hash join... [0.123s] +Building seed filter... [0.007s] +Searching alignments... [0.272s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.095s] +Computing hash join... [0.128s] +Building seed filter... [0.007s] +Searching alignments... [0.279s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.326s] +Building query seed array... [0.095s] +Computing hash join... [0.122s] +Building seed filter... [0.006s] +Searching alignments... [0.241s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.359s] +Building query seed array... [0.099s] +Computing hash join... [0.122s] +Building seed filter... [0.006s] +Searching alignments... [0.244s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.375s] +Building query seed array... [0.11s] +Computing hash join... [0.126s] +Building seed filter... [0.008s] +Searching alignments... [0.26s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.325s] +Building query seed array... [0.096s] +Computing hash join... [0.124s] +Building seed filter... [0.006s] +Searching alignments... [0.232s] +Deallocating buffers... [0.094s] +Computing alignments... [7.456s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 19.458s +Reported 300277 pairwise alignments, 300581 HSPs. +14869 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Halb_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.083s] +Masking queries... [0.305s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.115s] +Allocating buffers... [0s] +Loading reference sequences... [1.605s] +Masking reference... [1.057s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.494s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.094s] +Computing hash join... [0.164s] +Building seed filter... [0.008s] +Searching alignments... [0.39s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.079s] +Computing hash join... [0.133s] +Building seed filter... [0.009s] +Searching alignments... [0.362s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.377s] +Building query seed array... [0.082s] +Computing hash join... [0.135s] +Building seed filter... [0.009s] +Searching alignments... [0.325s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.077s] +Computing hash join... [0.134s] +Building seed filter... [0.008s] +Searching alignments... [0.329s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.326s] +Building query seed array... [0.073s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.333s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.38s] +Building query seed array... [0.082s] +Computing hash join... [0.131s] +Building seed filter... [0.008s] +Searching alignments... [0.28s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.094s] +Computing hash join... [0.135s] +Building seed filter... [0.008s] +Searching alignments... [0.346s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.072s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.294s] +Deallocating buffers... [0.094s] +Computing alignments... [5.809s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.012s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 17.987s +Reported 309767 pairwise alignments, 311870 HSPs. +15500 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn08_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.412s] +Masking queries... [0.368s] +Building query seed set... [0.056s] +Algorithm: Double-indexed +Building query histograms... [0.131s] +Allocating buffers... [0s] +Loading reference sequences... [1.596s] +Masking reference... [1.06s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.463s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.339s] +Building query seed array... [0.107s] +Computing hash join... [0.139s] +Building seed filter... [0.007s] +Searching alignments... [0.383s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.107s] +Computing hash join... [0.112s] +Building seed filter... [0.009s] +Searching alignments... [0.346s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.093s] +Computing hash join... [0.118s] +Building seed filter... [0.009s] +Searching alignments... [0.316s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.083s] +Computing hash join... [0.109s] +Building seed filter... [0.009s] +Searching alignments... [0.318s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.321s] +Building query seed array... [0.09s] +Computing hash join... [0.114s] +Building seed filter... [0.007s] +Searching alignments... [0.29s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.361s] +Building query seed array... [0.098s] +Computing hash join... [0.111s] +Building seed filter... [0.007s] +Searching alignments... [0.28s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.385s] +Building query seed array... [0.102s] +Computing hash join... [0.11s] +Building seed filter... [0.007s] +Searching alignments... [0.296s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.313s] +Building query seed array... [0.084s] +Computing hash join... [0.112s] +Building seed filter... [0.008s] +Searching alignments... [0.265s] +Deallocating buffers... [0.094s] +Computing alignments... [9.117s] +Deallocating reference... [0.034s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.016s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.434s +Reported 403699 pairwise alignments, 404113 HSPs. +19985 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Tx01_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.575s] +Masking queries... [0.392s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.149s] +Allocating buffers... [0s] +Loading reference sequences... [1.587s] +Masking reference... [1.043s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.486s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.346s] +Building query seed array... [0.12s] +Computing hash join... [0.15s] +Building seed filter... [0.014s] +Searching alignments... [0.527s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.37s] +Building query seed array... [0.115s] +Computing hash join... [0.135s] +Building seed filter... [0.013s] +Searching alignments... [0.479s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.38s] +Building query seed array... [0.12s] +Computing hash join... [0.134s] +Building seed filter... [0.012s] +Searching alignments... [0.431s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.331s] +Building query seed array... [0.101s] +Computing hash join... [0.128s] +Building seed filter... [0.014s] +Searching alignments... [0.417s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.317s] +Building query seed array... [0.107s] +Computing hash join... [0.131s] +Building seed filter... [0.01s] +Searching alignments... [0.406s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.364s] +Building query seed array... [0.114s] +Computing hash join... [0.134s] +Building seed filter... [0.01s] +Searching alignments... [0.365s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.383s] +Building query seed array... [0.123s] +Computing hash join... [0.132s] +Building seed filter... [0.01s] +Searching alignments... [0.425s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.318s] +Building query seed array... [0.106s] +Computing hash join... [0.135s] +Building seed filter... [0.011s] +Searching alignments... [0.36s] +Deallocating buffers... [0.092s] +Computing alignments... [8.807s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.017s] +Deallocating queries... [0.021s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 22.588s +Reported 468908 pairwise alignments, 470976 HSPs. +23977 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn07_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.342s] +Masking queries... [0.383s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.146s] +Allocating buffers... [0s] +Loading reference sequences... [1.591s] +Masking reference... [1.057s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.44s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.343s] +Building query seed array... [0.115s] +Computing hash join... [0.144s] +Building seed filter... [0.009s] +Searching alignments... [0.49s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.102s] +Computing hash join... [0.132s] +Building seed filter... [0.009s] +Searching alignments... [0.489s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.381s] +Building query seed array... [0.113s] +Computing hash join... [0.135s] +Building seed filter... [0.01s] +Searching alignments... [0.43s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.321s] +Building query seed array... [0.096s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.411s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.311s] +Building query seed array... [0.094s] +Computing hash join... [0.129s] +Building seed filter... [0.008s] +Searching alignments... [0.403s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.361s] +Building query seed array... [0.114s] +Computing hash join... [0.128s] +Building seed filter... [0.009s] +Searching alignments... [0.345s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.113s] +Computing hash join... [0.128s] +Building seed filter... [0.008s] +Searching alignments... [0.421s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.099s] +Computing hash join... [0.131s] +Building seed filter... [0.009s] +Searching alignments... [0.315s] +Deallocating buffers... [0.094s] +Computing alignments... [8.498s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.021s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.772s +Reported 440387 pairwise alignments, 443233 HSPs. +22715 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn01_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.925s] +Masking queries... [0.552s] +Building query seed set... [0.049s] +Algorithm: Double-indexed +Building query histograms... [0.186s] +Allocating buffers... [0s] +Loading reference sequences... [1.614s] +Masking reference... [1.056s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.463s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.343s] +Building query seed array... [0.118s] +Computing hash join... [0.143s] +Building seed filter... [0.009s] +Searching alignments... [0.433s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.113s] +Computing hash join... [0.129s] +Building seed filter... [0.011s] +Searching alignments... [0.382s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.391s] +Building query seed array... [0.118s] +Computing hash join... [0.134s] +Building seed filter... [0.009s] +Searching alignments... [0.363s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.102s] +Computing hash join... [0.131s] +Building seed filter... [0.011s] +Searching alignments... [0.36s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.317s] +Building query seed array... [0.1s] +Computing hash join... [0.128s] +Building seed filter... [0.01s] +Searching alignments... [0.352s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.369s] +Building query seed array... [0.116s] +Computing hash join... [0.126s] +Building seed filter... [0.01s] +Searching alignments... [0.341s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.377s] +Building query seed array... [0.116s] +Computing hash join... [0.134s] +Building seed filter... [0.01s] +Searching alignments... [0.347s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.101s] +Computing hash join... [0.13s] +Building seed filter... [0.011s] +Searching alignments... [0.317s] +Deallocating buffers... [0.092s] +Computing alignments... [7.998s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.028s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.772s +Reported 405956 pairwise alignments, 409986 HSPs. +21474 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv04_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.92s] +Masking queries... [0.256s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.103s] +Allocating buffers... [0s] +Loading reference sequences... [1.588s] +Masking reference... [1.071s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.443s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.345s] +Building query seed array... [0.076s] +Computing hash join... [0.177s] +Building seed filter... [0.008s] +Searching alignments... [0.377s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.07s] +Computing hash join... [0.134s] +Building seed filter... [0.009s] +Searching alignments... [0.331s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.086s] +Computing hash join... [0.135s] +Building seed filter... [0.01s] +Searching alignments... [0.342s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.322s] +Building query seed array... [0.074s] +Computing hash join... [0.132s] +Building seed filter... [0.009s] +Searching alignments... [0.342s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.319s] +Building query seed array... [0.073s] +Computing hash join... [0.135s] +Building seed filter... [0.008s] +Searching alignments... [0.334s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.08s] +Computing hash join... [0.133s] +Building seed filter... [0.008s] +Searching alignments... [0.282s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.385s] +Building query seed array... [0.087s] +Computing hash join... [0.139s] +Building seed filter... [0.008s] +Searching alignments... [0.267s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.071s] +Computing hash join... [0.135s] +Building seed filter... [0.009s] +Searching alignments... [0.29s] +Deallocating buffers... [0.093s] +Computing alignments... [7.134s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.011s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 18.906s +Reported 537338 pairwise alignments, 538184 HSPs. +28139 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Usac_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.187s] +Masking queries... [0.538s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.206s] +Allocating buffers... [0s] +Loading reference sequences... [1.604s] +Masking reference... [1.049s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.445s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.356s] +Building query seed array... [0.154s] +Computing hash join... [0.154s] +Building seed filter... [0.011s] +Searching alignments... [0.751s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.364s] +Building query seed array... [0.147s] +Computing hash join... [0.136s] +Building seed filter... [0.01s] +Searching alignments... [0.686s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.385s] +Building query seed array... [0.153s] +Computing hash join... [0.144s] +Building seed filter... [0.01s] +Searching alignments... [0.603s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.313s] +Building query seed array... [0.141s] +Computing hash join... [0.15s] +Building seed filter... [0.011s] +Searching alignments... [0.608s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.323s] +Building query seed array... [0.136s] +Computing hash join... [0.144s] +Building seed filter... [0.012s] +Searching alignments... [0.609s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.147s] +Computing hash join... [0.141s] +Building seed filter... [0.012s] +Searching alignments... [0.534s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.161s] +Computing hash join... [0.142s] +Building seed filter... [0.011s] +Searching alignments... [0.625s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.133s] +Computing hash join... [0.14s] +Building seed filter... [0.01s] +Searching alignments... [0.512s] +Deallocating buffers... [0.091s] +Computing alignments... [12.551s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.024s] +Deallocating queries... [0.028s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 28.989s +Reported 725553 pairwise alignments, 730001 HSPs. +36674 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn04_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.543s] +Masking queries... [0.171s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.07s] +Allocating buffers... [0s] +Loading reference sequences... [1.594s] +Masking reference... [1.056s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.483s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.353s] +Building query seed array... [0.07s] +Computing hash join... [0.157s] +Building seed filter... [0.005s] +Searching alignments... [0.158s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.36s] +Building query seed array... [0.05s] +Computing hash join... [0.126s] +Building seed filter... [0.006s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.383s] +Building query seed array... [0.059s] +Computing hash join... [0.118s] +Building seed filter... [0.008s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.054s] +Computing hash join... [0.115s] +Building seed filter... [0.007s] +Searching alignments... [0.143s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.318s] +Building query seed array... [0.05s] +Computing hash join... [0.108s] +Building seed filter... [0.006s] +Searching alignments... [0.128s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.057s] +Computing hash join... [0.117s] +Building seed filter... [0.007s] +Searching alignments... [0.141s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.061s] +Computing hash join... [0.11s] +Building seed filter... [0.006s] +Searching alignments... [0.131s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.326s] +Building query seed array... [0.057s] +Computing hash join... [0.112s] +Building seed filter... [0.006s] +Searching alignments... [0.126s] +Deallocating buffers... [0.095s] +Computing alignments... [3.851s] +Deallocating reference... [0.025s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.008s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 13.385s +Reported 158063 pairwise alignments, 159384 HSPs. +7943 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Emar_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.032s] +Masking queries... [0.268s] +Building query seed set... [0.04s] +Algorithm: Double-indexed +Building query histograms... [0.088s] +Allocating buffers... [0s] +Loading reference sequences... [1.609s] +Masking reference... [1.222s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.44s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.074s] +Computing hash join... [0.172s] +Building seed filter... [0.008s] +Searching alignments... [0.338s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.361s] +Building query seed array... [0.068s] +Computing hash join... [0.149s] +Building seed filter... [0.007s] +Searching alignments... [0.291s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.064s] +Computing hash join... [0.136s] +Building seed filter... [0.007s] +Searching alignments... [0.279s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.379s] +Building query seed array... [0.064s] +Computing hash join... [0.136s] +Building seed filter... [0.007s] +Searching alignments... [0.269s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.316s] +Building query seed array... [0.056s] +Computing hash join... [0.133s] +Building seed filter... [0.006s] +Searching alignments... [0.239s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.364s] +Building query seed array... [0.065s] +Computing hash join... [0.128s] +Building seed filter... [0.007s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.068s] +Computing hash join... [0.134s] +Building seed filter... [0.007s] +Searching alignments... [0.252s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.054s] +Computing hash join... [0.131s] +Building seed filter... [0.007s] +Searching alignments... [0.232s] +Deallocating buffers... [0.093s] +Computing alignments... [5.993s] +Deallocating reference... [0.035s] +Loading reference sequences... [0s] +Deallocating buffers... [0.008s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 17.572s +Reported 461440 pairwise alignments, 462438 HSPs. +23164 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Rsp1_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.744s] +Masking queries... [0.211s] +Building query seed set... [0.041s] +Algorithm: Double-indexed +Building query histograms... [0.079s] +Allocating buffers... [0s] +Loading reference sequences... [1.602s] +Masking reference... [1.042s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.478s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.348s] +Building query seed array... [0.057s] +Computing hash join... [0.167s] +Building seed filter... [0.007s] +Searching alignments... [0.237s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.367s] +Building query seed array... [0.062s] +Computing hash join... [0.137s] +Building seed filter... [0.006s] +Searching alignments... [0.222s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.383s] +Building query seed array... [0.064s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.214s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.057s] +Computing hash join... [0.132s] +Building seed filter... [0.007s] +Searching alignments... [0.204s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.316s] +Building query seed array... [0.054s] +Computing hash join... [0.127s] +Building seed filter... [0.006s] +Searching alignments... [0.187s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.059s] +Computing hash join... [0.129s] +Building seed filter... [0.006s] +Searching alignments... [0.19s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.063s] +Computing hash join... [0.128s] +Building seed filter... [0.008s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.322s] +Building query seed array... [0.059s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.181s] +Deallocating buffers... [0.092s] +Computing alignments... [4.925s] +Deallocating reference... [0.032s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.011s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 15.334s +Reported 287405 pairwise alignments, 289878 HSPs. +15066 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Sspa_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.639s] +Masking queries... [0.176s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.083s] +Allocating buffers... [0s] +Loading reference sequences... [1.592s] +Masking reference... [1.04s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.482s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.344s] +Building query seed array... [0.07s] +Computing hash join... [0.161s] +Building seed filter... [0.005s] +Searching alignments... [0.179s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.371s] +Building query seed array... [0.053s] +Computing hash join... [0.117s] +Building seed filter... [0.007s] +Searching alignments... [0.169s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.388s] +Building query seed array... [0.062s] +Computing hash join... [0.11s] +Building seed filter... [0.006s] +Searching alignments... [0.16s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.051s] +Computing hash join... [0.116s] +Building seed filter... [0.006s] +Searching alignments... [0.162s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.321s] +Building query seed array... [0.051s] +Computing hash join... [0.105s] +Building seed filter... [0.006s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.056s] +Computing hash join... [0.107s] +Building seed filter... [0.007s] +Searching alignments... [0.146s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.375s] +Building query seed array... [0.059s] +Computing hash join... [0.105s] +Building seed filter... [0.007s] +Searching alignments... [0.153s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.047s] +Computing hash join... [0.106s] +Building seed filter... [0.007s] +Searching alignments... [0.142s] +Deallocating buffers... [0.094s] +Computing alignments... [4.475s] +Deallocating reference... [0.023s] +Loading reference sequences... [0s] +Deallocating buffers... [0.007s] +Deallocating queries... [0.008s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 14.192s +Reported 208347 pairwise alignments, 208875 HSPs. +10555 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Hhir_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.196s] +Masking queries... [0.331s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.12s] +Allocating buffers... [0s] +Loading reference sequences... [1.595s] +Masking reference... [1.25s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.458s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.341s] +Building query seed array... [0.1s] +Computing hash join... [0.146s] +Building seed filter... [0.011s] +Searching alignments... [0.309s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.365s] +Building query seed array... [0.104s] +Computing hash join... [0.137s] +Building seed filter... [0.012s] +Searching alignments... [0.298s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.377s] +Building query seed array... [0.099s] +Computing hash join... [0.128s] +Building seed filter... [0.01s] +Searching alignments... [0.287s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.32s] +Building query seed array... [0.088s] +Computing hash join... [0.134s] +Building seed filter... [0.009s] +Searching alignments... [0.274s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.316s] +Building query seed array... [0.088s] +Computing hash join... [0.126s] +Building seed filter... [0.009s] +Searching alignments... [0.249s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.1s] +Computing hash join... [0.129s] +Building seed filter... [0.011s] +Searching alignments... [0.263s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.38s] +Building query seed array... [0.094s] +Computing hash join... [0.128s] +Building seed filter... [0.01s] +Searching alignments... [0.252s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.324s] +Building query seed array... [0.092s] +Computing hash join... [0.129s] +Building seed filter... [0.01s] +Searching alignments... [0.244s] +Deallocating buffers... [0.097s] +Computing alignments... [6.423s] +Deallocating reference... [0.041s] +Loading reference sequences... [0s] +Deallocating buffers... [0.013s] +Deallocating queries... [0.015s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 18.487s +Reported 329599 pairwise alignments, 331231 HSPs. +17423 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn12_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.186s] +Masking queries... [0.323s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.122s] +Allocating buffers... [0s] +Loading reference sequences... [1.602s] +Masking reference... [1.068s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.484s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.344s] +Building query seed array... [0.089s] +Computing hash join... [0.151s] +Building seed filter... [0.01s] +Searching alignments... [0.392s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.087s] +Computing hash join... [0.126s] +Building seed filter... [0.011s] +Searching alignments... [0.275s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.09s] +Computing hash join... [0.132s] +Building seed filter... [0.01s] +Searching alignments... [0.271s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.322s] +Building query seed array... [0.087s] +Computing hash join... [0.128s] +Building seed filter... [0.011s] +Searching alignments... [0.261s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.081s] +Computing hash join... [0.126s] +Building seed filter... [0.009s] +Searching alignments... [0.235s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.092s] +Computing hash join... [0.124s] +Building seed filter... [0.008s] +Searching alignments... [0.253s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.376s] +Building query seed array... [0.092s] +Computing hash join... [0.127s] +Building seed filter... [0.008s] +Searching alignments... [0.243s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.321s] +Building query seed array... [0.107s] +Computing hash join... [0.125s] +Building seed filter... [0.011s] +Searching alignments... [0.225s] +Deallocating buffers... [0.099s] +Computing alignments... [6.156s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.014s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 17.975s +Reported 330365 pairwise alignments, 332323 HSPs. +17441 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn14_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [2.193s] +Masking queries... [0.553s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.214s] +Allocating buffers... [0s] +Loading reference sequences... [1.618s] +Masking reference... [1.063s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.459s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.359s] +Building query seed array... [0.158s] +Computing hash join... [0.155s] +Building seed filter... [0.01s] +Searching alignments... [0.505s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.361s] +Building query seed array... [0.15s] +Computing hash join... [0.144s] +Building seed filter... [0.011s] +Searching alignments... [0.476s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.151s] +Computing hash join... [0.148s] +Building seed filter... [0.01s] +Searching alignments... [0.448s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.13s] +Computing hash join... [0.142s] +Building seed filter... [0.011s] +Searching alignments... [0.445s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.32s] +Building query seed array... [0.133s] +Computing hash join... [0.145s] +Building seed filter... [0.011s] +Searching alignments... [0.394s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.366s] +Building query seed array... [0.15s] +Computing hash join... [0.146s] +Building seed filter... [0.012s] +Searching alignments... [0.422s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.402s] +Building query seed array... [0.16s] +Computing hash join... [0.141s] +Building seed filter... [0.01s] +Searching alignments... [0.4s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.313s] +Building query seed array... [0.136s] +Computing hash join... [0.141s] +Building seed filter... [0.012s] +Searching alignments... [0.386s] +Deallocating buffers... [0.097s] +Computing alignments... [12.415s] +Deallocating reference... [0.038s] +Loading reference sequences... [0s] +Deallocating buffers... [0.021s] +Deallocating queries... [0.029s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 27.503s +Reported 604291 pairwise alignments, 606168 HSPs. +31505 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Gl02_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.803s] +Masking queries... [0.507s] +Building query seed set... [0.05s] +Algorithm: Double-indexed +Building query histograms... [0.156s] +Allocating buffers... [0s] +Loading reference sequences... [1.612s] +Masking reference... [1.068s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.449s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.344s] +Building query seed array... [0.101s] +Computing hash join... [0.247s] +Building seed filter... [0.01s] +Searching alignments... [0.398s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.371s] +Building query seed array... [0.11s] +Computing hash join... [0.124s] +Building seed filter... [0.009s] +Searching alignments... [0.389s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.396s] +Building query seed array... [0.112s] +Computing hash join... [0.133s] +Building seed filter... [0.01s] +Searching alignments... [0.365s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.315s] +Building query seed array... [0.098s] +Computing hash join... [0.13s] +Building seed filter... [0.01s] +Searching alignments... [0.363s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.102s] +Computing hash join... [0.132s] +Building seed filter... [0.01s] +Searching alignments... [0.341s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.371s] +Building query seed array... [0.11s] +Computing hash join... [0.13s] +Building seed filter... [0.01s] +Searching alignments... [0.321s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.112s] +Computing hash join... [0.13s] +Building seed filter... [0.01s] +Searching alignments... [0.35s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.32s] +Building query seed array... [0.1s] +Computing hash join... [0.127s] +Building seed filter... [0.01s] +Searching alignments... [0.311s] +Deallocating buffers... [0.093s] +Computing alignments... [7.788s] +Deallocating reference... [0.037s] +Loading reference sequences... [0s] +Deallocating buffers... [0.015s] +Deallocating queries... [0.024s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 21.37s +Reported 412048 pairwise alignments, 415577 HSPs. +21731 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv05_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.542s] +Masking queries... [0.379s] +Building query seed set... [0.049s] +Algorithm: Double-indexed +Building query histograms... [0.123s] +Allocating buffers... [0s] +Loading reference sequences... [1.582s] +Masking reference... [1.08s] +Initializing temporary storage... [0.013s] +Building reference histograms... [0.476s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.355s] +Building query seed array... [0.099s] +Computing hash join... [0.177s] +Building seed filter... [0.011s] +Searching alignments... [0.378s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.087s] +Computing hash join... [0.13s] +Building seed filter... [0.009s] +Searching alignments... [0.354s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.38s] +Building query seed array... [0.093s] +Computing hash join... [0.133s] +Building seed filter... [0.011s] +Searching alignments... [0.338s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.323s] +Building query seed array... [0.089s] +Computing hash join... [0.134s] +Building seed filter... [0.009s] +Searching alignments... [0.344s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.321s] +Building query seed array... [0.084s] +Computing hash join... [0.13s] +Building seed filter... [0.008s] +Searching alignments... [0.317s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.36s] +Building query seed array... [0.112s] +Computing hash join... [0.139s] +Building seed filter... [0.008s] +Searching alignments... [0.297s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.382s] +Building query seed array... [0.09s] +Computing hash join... [0.133s] +Building seed filter... [0.008s] +Searching alignments... [0.324s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.085s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.281s] +Deallocating buffers... [0.092s] +Computing alignments... [6.871s] +Deallocating reference... [0.038s] +Loading reference sequences... [0s] +Deallocating buffers... [0.011s] +Deallocating queries... [0.02s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 19.663s +Reported 426635 pairwise alignments, 430735 HSPs. +22250 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Bv01_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.369s] +Masking queries... [0.107s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.046s] +Allocating buffers... [0s] +Loading reference sequences... [1.619s] +Masking reference... [1.097s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.461s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.353s] +Building query seed array... [0.043s] +Computing hash join... [0.116s] +Building seed filter... [0.006s] +Searching alignments... [0.109s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.357s] +Building query seed array... [0.034s] +Computing hash join... [0.087s] +Building seed filter... [0.006s] +Searching alignments... [0.119s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.396s] +Building query seed array... [0.04s] +Computing hash join... [0.076s] +Building seed filter... [0.005s] +Searching alignments... [0.104s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.033s] +Computing hash join... [0.077s] +Building seed filter... [0.005s] +Searching alignments... [0.107s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.317s] +Building query seed array... [0.04s] +Computing hash join... [0.078s] +Building seed filter... [0.006s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.357s] +Building query seed array... [0.035s] +Computing hash join... [0.077s] +Building seed filter... [0.005s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.371s] +Building query seed array... [0.037s] +Computing hash join... [0.077s] +Building seed filter... [0.006s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.315s] +Building query seed array... [0.038s] +Computing hash join... [0.075s] +Building seed filter... [0.005s] +Searching alignments... [0.089s] +Deallocating buffers... [0.091s] +Computing alignments... [2.255s] +Deallocating reference... [0.027s] +Loading reference sequences... [0s] +Deallocating buffers... [0.003s] +Deallocating queries... [0.007s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 10.778s +Reported 119039 pairwise alignments, 121169 HSPs. +5832 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Slin_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.775s] +Masking queries... [0.471s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.174s] +Allocating buffers... [0s] +Loading reference sequences... [1.598s] +Masking reference... [1.039s] +Initializing temporary storage... [0.012s] +Building reference histograms... [0.468s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.351s] +Building query seed array... [0.136s] +Computing hash join... [0.16s] +Building seed filter... [0.012s] +Searching alignments... [0.495s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.372s] +Building query seed array... [0.129s] +Computing hash join... [0.145s] +Building seed filter... [0.01s] +Searching alignments... [0.481s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.378s] +Building query seed array... [0.129s] +Computing hash join... [0.144s] +Building seed filter... [0.012s] +Searching alignments... [0.451s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.315s] +Building query seed array... [0.119s] +Computing hash join... [0.144s] +Building seed filter... [0.013s] +Searching alignments... [0.441s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.321s] +Building query seed array... [0.114s] +Computing hash join... [0.145s] +Building seed filter... [0.012s] +Searching alignments... [0.386s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.136s] +Computing hash join... [0.151s] +Building seed filter... [0.013s] +Searching alignments... [0.388s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.139s] +Computing hash join... [0.144s] +Building seed filter... [0.014s] +Searching alignments... [0.389s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.126s] +Computing hash join... [0.149s] +Building seed filter... [0.012s] +Searching alignments... [0.364s] +Deallocating buffers... [0.095s] +Computing alignments... [11.681s] +Deallocating reference... [0.036s] +Loading reference sequences... [0s] +Deallocating buffers... [0.021s] +Deallocating queries... [0.028s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 25.969s +Reported 640067 pairwise alignments, 641506 HSPs. +32752 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Gl03_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.801s] +Masking queries... [0.495s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.18s] +Allocating buffers... [0s] +Loading reference sequences... [1.586s] +Masking reference... [1.057s] +Initializing temporary storage... [0.013s] +Building reference histograms... [0.47s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.345s] +Building query seed array... [0.135s] +Computing hash join... [0.166s] +Building seed filter... [0.01s] +Searching alignments... [0.68s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.371s] +Building query seed array... [0.136s] +Computing hash join... [0.14s] +Building seed filter... [0.012s] +Searching alignments... [0.5s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.383s] +Building query seed array... [0.142s] +Computing hash join... [0.139s] +Building seed filter... [0.01s] +Searching alignments... [0.596s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.317s] +Building query seed array... [0.127s] +Computing hash join... [0.144s] +Building seed filter... [0.01s] +Searching alignments... [0.585s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.315s] +Building query seed array... [0.118s] +Computing hash join... [0.139s] +Building seed filter... [0.01s] +Searching alignments... [0.566s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.363s] +Building query seed array... [0.133s] +Computing hash join... [0.139s] +Building seed filter... [0.01s] +Searching alignments... [0.462s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.379s] +Building query seed array... [0.133s] +Computing hash join... [0.143s] +Building seed filter... [0.011s] +Searching alignments... [0.587s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.316s] +Building query seed array... [0.116s] +Computing hash join... [0.138s] +Building seed filter... [0.011s] +Searching alignments... [0.379s] +Deallocating buffers... [0.095s] +Computing alignments... [10.458s] +Deallocating reference... [0.034s] +Loading reference sequences... [0s] +Deallocating buffers... [0.021s] +Deallocating queries... [0.018s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 25.722s +Reported 592579 pairwise alignments, 596988 HSPs. +30042 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn09_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [1.276s] +Masking queries... [0.366s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.132s] +Allocating buffers... [0s] +Loading reference sequences... [1.59s] +Masking reference... [1.132s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.465s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.352s] +Building query seed array... [0.11s] +Computing hash join... [0.147s] +Building seed filter... [0.008s] +Searching alignments... [0.452s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.362s] +Building query seed array... [0.108s] +Computing hash join... [0.131s] +Building seed filter... [0.008s] +Searching alignments... [0.364s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.375s] +Building query seed array... [0.102s] +Computing hash join... [0.13s] +Building seed filter... [0.008s] +Searching alignments... [0.328s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.319s] +Building query seed array... [0.103s] +Computing hash join... [0.132s] +Building seed filter... [0.008s] +Searching alignments... [0.4s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.327s] +Building query seed array... [0.092s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.283s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.368s] +Building query seed array... [0.102s] +Computing hash join... [0.125s] +Building seed filter... [0.009s] +Searching alignments... [0.303s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.377s] +Building query seed array... [0.112s] +Computing hash join... [0.129s] +Building seed filter... [0.009s] +Searching alignments... [0.33s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.318s] +Building query seed array... [0.092s] +Computing hash join... [0.127s] +Building seed filter... [0.009s] +Searching alignments... [0.276s] +Deallocating buffers... [0.093s] +Computing alignments... [7.313s] +Deallocating reference... [0.039s] +Loading reference sequences... [0s] +Deallocating buffers... [0.016s] +Deallocating queries... [0.019s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 19.997s +Reported 381006 pairwise alignments, 382593 HSPs. +19632 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Nn13_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/DiamondOG +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = ../Databases/db_OG/Hook-6.5.dmnd +Sequences = 1512043 +Letters = 714797884 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.152s] +Masking queries... [0.062s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.032s] +Allocating buffers... [0s] +Loading reference sequences... [1.616s] +Masking reference... [1.285s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.457s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.351s] +Building query seed array... [0.028s] +Computing hash join... [0.069s] +Building seed filter... [0.004s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.357s] +Building query seed array... [0.024s] +Computing hash join... [0.058s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.37s] +Building query seed array... [0.022s] +Computing hash join... [0.054s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.32s] +Building query seed array... [0.029s] +Computing hash join... [0.056s] +Building seed filter... [0.004s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.313s] +Building query seed array... [0.027s] +Computing hash join... [0.054s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.358s] +Building query seed array... [0.026s] +Computing hash join... [0.056s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.374s] +Building query seed array... [0.023s] +Computing hash join... [0.05s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.314s] +Building query seed array... [0.025s] +Computing hash join... [0.052s] +Building seed filter... [0.004s] +Searching alignments... [0.029s] +Deallocating buffers... [0.092s] +Computing alignments... [0.366s] +Deallocating reference... [0.03s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0.001s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 7.863s +Reported 22784 pairwise alignments, 23612 HSPs. +1572 queries aligned. + +Starting to "BLAST" against OG databases + + +"BLAST"-ing against OG database using DIAMOND: Hook-6.5.dmnd + + + + +Processing OG-database results to keep only the BEST match for each transcript + + +Updating Fasta File Sequence Names with their BEST OG hits + + + +Look for Sr_rh_Atps_WTA_EPUWTA_EPU.fasta in the ../beegfs/ Folder + + +Next Script is: 4_InFrameStopFreq.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.024s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.009s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.012s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.046s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.052s] +Deallocating buffers... [0.01s] +Computing alignments... [0.462s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.89s +Reported 14977 pairwise alignments, 14985 HSPs. +14977 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.026s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.103s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.071s] +Building query seed array... [0.019s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.068s] +Building query seed array... [0.024s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.024s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.058s] +Building query seed array... [0.021s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.053s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.021s] +Computing hash join... [0.008s] +Building seed filter... [0.005s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.052s] +Deallocating buffers... [0.011s] +Computing alignments... [0.475s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.101s +Reported 14970 pairwise alignments, 14978 HSPs. +14970 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.023s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.066s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.019s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.056s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.009s] +Computing alignments... [0.471s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.991s +Reported 14951 pairwise alignments, 14957 HSPs. +14951 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn02_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.015s] +Masking queries... [0.022s] +Building query seed set... [0.026s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.153s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Deallocating buffers... [0.008s] +Computing alignments... [0.183s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.369s +Reported 5809 pairwise alignments, 5814 HSPs. +5809 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.015s] +Masking queries... [0.017s] +Building query seed set... [0.019s] +Algorithm: Double-indexed +Building query histograms... [0.008s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.103s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.065s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.066s] +Building query seed array... [0.015s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.059s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.01s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Deallocating buffers... [0.008s] +Computing alignments... [0.183s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.378s +Reported 5801 pairwise alignments, 5806 HSPs. +5801 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.015s] +Masking queries... [0.02s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.108s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.018s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.01s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Deallocating buffers... [0.01s] +Computing alignments... [0.184s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.354s +Reported 5802 pairwise alignments, 5807 HSPs. +5802 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Gspa_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.026s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.108s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.067s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.024s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.02s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.025s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.452s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.912s +Reported 16011 pairwise alignments, 16017 HSPs. +16011 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.067s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.009s] +Computing hash join... [0.018s] +Building seed filter... [0.004s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.04s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.041s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.046s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.452s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.802s +Reported 15989 pairwise alignments, 15994 HSPs. +15989 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.023s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.069s] +Building query seed array... [0.018s] +Computing hash join... [0.018s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.005s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.007s] +Computing alignments... [0.451s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.823s +Reported 16009 pairwise alignments, 16014 HSPs. +16009 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn06_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.031s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.021s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.121s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.069s] +Building query seed array... [0.028s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.022s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.066s] +Building query seed array... [0.023s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Deallocating buffers... [0.007s] +Computing alignments... [0.473s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.944s +Reported 19508 pairwise alignments, 19511 HSPs. +19508 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.025s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.02s] +Computing hash join... [0.018s] +Building seed filter... [0.003s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.024s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.077s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.024s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.064s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.025s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Deallocating buffers... [0.007s] +Computing alignments... [0.481s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.992s +Reported 19456 pairwise alignments, 19460 HSPs. +19456 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.026s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.048s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.007s] +Computing alignments... [0.471s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.796s +Reported 19448 pairwise alignments, 19452 HSPs. +19448 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv02_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.022s] +Masking queries... [0.024s] +Building query seed set... [0.025s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.145s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.052s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.062s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.012s] +Deallocating buffers... [0.008s] +Computing alignments... [0.29s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.596s +Reported 9114 pairwise alignments, 9130 HSPs. +9114 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.025s] +Masking queries... [0.025s] +Building query seed set... [0.025s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.06s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.068s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.008s] +Computing alignments... [0.288s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.586s +Reported 9086 pairwise alignments, 9105 HSPs. +9086 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.025s] +Masking queries... [0.024s] +Building query seed set... [0.026s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.111s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.018s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.009s] +Computing alignments... [0.282s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.536s +Reported 9098 pairwise alignments, 9119 HSPs. +9098 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Hind_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.022s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.145s] +Masking reference... [0.107s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.016s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.065s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.02s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.052s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.339s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.721s +Reported 13395 pairwise alignments, 13400 HSPs. +13395 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.024s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.009s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.047s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.011s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.037s] +Building query seed array... [0.009s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.038s] +Building query seed array... [0.009s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.35s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.588s +Reported 13387 pairwise alignments, 13390 HSPs. +13387 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.025s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.005s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.021s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.356s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.724s +Reported 13378 pairwise alignments, 13382 HSPs. +13378 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn11_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.025s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.105s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.014s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.024s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.019s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.007s] +Computing alignments... [0.448s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.832s +Reported 18677 pairwise alignments, 18682 HSPs. +18677 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.025s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.145s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.022s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.026s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.022s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.463s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.88s +Reported 18645 pairwise alignments, 18651 HSPs. +18645 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.026s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.021s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.066s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.021s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.023s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.064s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.457s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.926s +Reported 18616 pairwise alignments, 18622 HSPs. +18616 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv03_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.017s] +Masking queries... [0.019s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.008s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.005s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.065s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Deallocating buffers... [0.008s] +Computing alignments... [0.248s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.483s +Reported 7264 pairwise alignments, 7269 HSPs. +7264 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.018s] +Masking queries... [0.022s] +Building query seed set... [0.024s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.145s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.013s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Deallocating buffers... [0.009s] +Computing alignments... [0.25s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.505s +Reported 7257 pairwise alignments, 7263 HSPs. +7257 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.017s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.052s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Deallocating buffers... [0.008s] +Computing alignments... [0.244s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.462s +Reported 7266 pairwise alignments, 7272 HSPs. +7266 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Gsp1_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.021s] +Building query seed set... [0.019s] +Algorithm: Double-indexed +Building query histograms... [0.008s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.064s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Deallocating buffers... [0.007s] +Computing alignments... [0.256s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.523s +Reported 10678 pairwise alignments, 10680 HSPs. +10678 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.024s] +Masking queries... [0.024s] +Building query seed set... [0.023s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.006s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.016s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.007s] +Computing alignments... [0.257s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.54s +Reported 10650 pairwise alignments, 10652 HSPs. +10650 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.02s] +Masking queries... [0.02s] +Building query seed set... [0.028s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.053s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Deallocating buffers... [0.007s] +Computing alignments... [0.265s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.55s +Reported 10635 pairwise alignments, 10637 HSPs. +10635 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv06_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.001s] +Masking queries... [0.015s] +Building query seed set... [0.003s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.119s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.006s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.006s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.011s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.009s] +Computing hash join... [0.01s] +Building seed filter... [0.007s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.008s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.008s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.009s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.008s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Deallocating buffers... [0.007s] +Computing alignments... [0.054s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.1s +Reported 1110 pairwise alignments, 1110 HSPs. +1110 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.001s] +Masking queries... [0.014s] +Building query seed set... [0.002s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.155s] +Masking reference... [0.133s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.067s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.006s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.005s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Deallocating buffers... [0.007s] +Computing alignments... [0.053s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.135s +Reported 1110 pairwise alignments, 1110 HSPs. +1110 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.003s] +Masking queries... [0.013s] +Building query seed set... [0.003s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.109s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.006s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.062s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.007s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.055s] +Building query seed array... [0.009s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.011s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.009s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.01s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.009s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Deallocating buffers... [0.008s] +Computing alignments... [0.056s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.139s +Reported 1110 pairwise alignments, 1110 HSPs. +1110 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Gl01_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.018s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.008s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.112s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.066s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.013s] +Computing hash join... [0.015s] +Building seed filter... [0.006s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.055s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Deallocating buffers... [0.007s] +Computing alignments... [0.232s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.506s +Reported 8949 pairwise alignments, 8950 HSPs. +8949 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.022s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.008s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.068s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.009s] +Computing alignments... [0.242s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.483s +Reported 8938 pairwise alignments, 8939 HSPs. +8938 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.017s] +Building query seed set... [0.02s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.066s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.064s] +Building query seed array... [0.021s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.005s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.007s] +Computing alignments... [0.238s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.497s +Reported 8940 pairwise alignments, 8941 HSPs. +8940 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Esca_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.021s] +Masking queries... [0.025s] +Building query seed set... [0.024s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Deallocating buffers... [0.007s] +Computing alignments... [0.277s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.541s +Reported 8489 pairwise alignments, 8494 HSPs. +8489 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.03s] +Masking queries... [0.024s] +Building query seed set... [0.025s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.112s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.014s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.012s] +Computing alignments... [0.28s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.575s +Reported 8457 pairwise alignments, 8463 HSPs. +8457 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.021s] +Building query seed set... [0.025s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.111s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.015s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Deallocating buffers... [0.008s] +Computing alignments... [0.271s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.522s +Reported 8493 pairwise alignments, 8499 HSPs. +8493 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Calb_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.021s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.155s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.065s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.005s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.019s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Deallocating buffers... [0.008s] +Computing alignments... [0.243s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.499s +Reported 7656 pairwise alignments, 7658 HSPs. +7656 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.025s] +Masking queries... [0.021s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.009s] +Computing alignments... [0.245s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.459s +Reported 7627 pairwise alignments, 7629 HSPs. +7627 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.024s] +Masking queries... [0.02s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.112s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.054s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Deallocating buffers... [0.008s] +Computing alignments... [0.243s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.525s +Reported 7662 pairwise alignments, 7664 HSPs. +7662 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Emac_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.032s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.015s] +Computing hash join... [0.019s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.019s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.02s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.007s] +Computing alignments... [0.373s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.759s +Reported 13827 pairwise alignments, 13830 HSPs. +13827 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.027s] +Masking queries... [0.024s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.102s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.067s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.023s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.02s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Deallocating buffers... [0.007s] +Computing alignments... [0.383s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.757s +Reported 13833 pairwise alignments, 13835 HSPs. +13833 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.027s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.114s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.074s] +Building query seed array... [0.023s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.021s] +Computing hash join... [0.014s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.059s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.024s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.058s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.378s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.822s +Reported 13823 pairwise alignments, 13825 HSPs. +13823 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn05_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.028s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.017s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.023s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.023s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.022s] +Deallocating buffers... [0.008s] +Computing alignments... [0.54s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.997s +Reported 16657 pairwise alignments, 16661 HSPs. +16657 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.037s] +Building query seed set... [0.044s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.106s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.02s] +Computing hash join... [0.012s] +Building seed filter... [0.007s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.014s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Deallocating buffers... [0.007s] +Computing alignments... [0.536s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.985s +Reported 16447 pairwise alignments, 16451 HSPs. +16447 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.025s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.019s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.056s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.025s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.027s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.062s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.023s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.023s] +Computing hash join... [0.008s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Deallocating buffers... [0.009s] +Computing alignments... [0.534s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.049s +Reported 16439 pairwise alignments, 16443 HSPs. +16439 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn10_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.026s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.017s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.007s] +Computing alignments... [0.409s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.746s +Reported 14708 pairwise alignments, 14710 HSPs. +14708 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.025s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.016s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.02s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.422s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.804s +Reported 14686 pairwise alignments, 14687 HSPs. +14686 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.03s] +Masking queries... [0.032s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.067s] +Building query seed array... [0.022s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.024s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Deallocating buffers... [0.007s] +Computing alignments... [0.419s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.822s +Reported 14707 pairwise alignments, 14709 HSPs. +14707 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn03_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.103s] +Building query seed set... [0.027s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.112s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.02s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.013s] +Computing hash join... [0.006s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.023s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.02s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.019s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.014s] +Deallocating buffers... [0.007s] +Computing alignments... [0.406s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.813s +Reported 12458 pairwise alignments, 12469 HSPs. +12458 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.027s] +Building query seed set... [0.027s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.067s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.017s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.016s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.019s] +Computing hash join... [0.008s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.022s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.412s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.745s +Reported 12438 pairwise alignments, 12449 HSPs. +12438 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.026s] +Building query seed set... [0.028s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.108s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.02s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.057s] +Building query seed array... [0.02s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.016s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.018s] +Computing hash join... [0.006s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.008s] +Computing alignments... [0.4s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.78s +Reported 12459 pairwise alignments, 12473 HSPs. +12459 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Halb_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.023s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.014s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.067s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.043s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.064s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Deallocating buffers... [0.008s] +Computing alignments... [0.376s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.834s +Reported 13085 pairwise alignments, 13089 HSPs. +13085 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.022s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.045s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.062s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.057s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.133s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.068s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.067s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Deallocating buffers... [0.008s] +Computing alignments... [0.378s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.875s +Reported 13082 pairwise alignments, 13085 HSPs. +13082 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.022s] +Masking queries... [0.024s] +Building query seed set... [0.023s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.011s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.119s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Deallocating buffers... [0.007s] +Computing alignments... [0.379s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.802s +Reported 13075 pairwise alignments, 13079 HSPs. +13075 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn08_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.032s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.056s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.062s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.013s] +Computing hash join... [0.006s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.011s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.012s] +Computing hash join... [0.006s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.012s] +Computing hash join... [0.006s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.018s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.011s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Deallocating buffers... [0.009s] +Computing alignments... [0.496s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.894s +Reported 16992 pairwise alignments, 17022 HSPs. +16992 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.025s] +Building query seed set... [0.027s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.054s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.013s] +Computing hash join... [0.008s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.01s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.011s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Deallocating buffers... [0.008s] +Computing alignments... [0.498s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.821s +Reported 16995 pairwise alignments, 17026 HSPs. +16995 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.027s] +Building query seed set... [0.028s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.053s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.011s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.013s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.011s] +Computing hash join... [0.007s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.012s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Deallocating buffers... [0.008s] +Computing alignments... [0.497s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.825s +Reported 17008 pairwise alignments, 17039 HSPs. +17008 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Tx01_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.05s] +Masking queries... [0.034s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.122s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.013s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.04s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.042s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.041s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Deallocating buffers... [0.007s] +Computing alignments... [0.514s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.903s +Reported 19505 pairwise alignments, 19510 HSPs. +19505 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.043s] +Masking queries... [0.038s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.144s] +Masking reference... [0.114s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.021s] +Computing hash join... [0.016s] +Building seed filter... [0.004s] +Searching alignments... [0.064s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.062s] +Building query seed array... [0.022s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.074s] +Building query seed array... [0.026s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.066s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.023s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.061s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Deallocating buffers... [0.007s] +Computing alignments... [0.532s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.098s +Reported 19471 pairwise alignments, 19474 HSPs. +19471 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.04s] +Masking queries... [0.026s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.052s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.061s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.021s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Deallocating buffers... [0.007s] +Computing alignments... [0.52s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.964s +Reported 19449 pairwise alignments, 19453 HSPs. +19449 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn07_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.026s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.01s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.062s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.045s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.04s] +Building query seed array... [0.009s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Deallocating buffers... [0.009s] +Computing alignments... [0.515s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.892s +Reported 18653 pairwise alignments, 18661 HSPs. +18653 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.044s] +Masking queries... [0.03s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.115s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.053s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.005s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Deallocating buffers... [0.009s] +Computing alignments... [0.527s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.96s +Reported 18645 pairwise alignments, 18655 HSPs. +18645 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.028s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.006s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.043s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.046s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.005s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.012s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.038s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Deallocating buffers... [0.008s] +Computing alignments... [0.525s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.873s +Reported 18646 pairwise alignments, 18653 HSPs. +18646 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn01_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.025s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.105s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.052s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.027s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.058s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.449s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.88s +Reported 17205 pairwise alignments, 17208 HSPs. +17205 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.034s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.103s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.016s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.064s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Deallocating buffers... [0.008s] +Computing alignments... [0.454s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.876s +Reported 17102 pairwise alignments, 17107 HSPs. +17102 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.042s] +Masking queries... [0.026s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.153s] +Masking reference... [0.106s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.009s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Deallocating buffers... [0.01s] +Computing alignments... [0.46s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.785s +Reported 17052 pairwise alignments, 17057 HSPs. +17052 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv04_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.028s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.106s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.051s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.037s] +Building query seed array... [0.008s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.037s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Deallocating buffers... [0.01s] +Computing alignments... [0.422s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.742s +Reported 23370 pairwise alignments, 23380 HSPs. +23370 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.048s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.009s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.008s] +Building seed filter... [0.005s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.048s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.032s] +Deallocating buffers... [0.008s] +Computing alignments... [0.425s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.818s +Reported 23332 pairwise alignments, 23344 HSPs. +23332 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.037s] +Masking queries... [0.03s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.111s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.046s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Deallocating buffers... [0.008s] +Computing alignments... [0.423s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.842s +Reported 23363 pairwise alignments, 23372 HSPs. +23363 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Usac_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.05s] +Masking queries... [0.032s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.056s] +Building query seed array... [0.013s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.064s] +Building query seed array... [0.02s] +Computing hash join... [0.013s] +Building seed filter... [0.005s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.014s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.041s] +Building query seed array... [0.015s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.04s] +Building query seed array... [0.014s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Deallocating buffers... [0.007s] +Computing alignments... [0.748s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.291s +Reported 30410 pairwise alignments, 30414 HSPs. +30410 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.052s] +Masking queries... [0.032s] +Building query seed set... [0.042s] +Algorithm: Double-indexed +Building query histograms... [0.022s] +Allocating buffers... [0s] +Loading reference sequences... [0.146s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.056s] +Building query seed array... [0.015s] +Computing hash join... [0.019s] +Building seed filter... [0.006s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.053s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.013s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.04s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Deallocating buffers... [0.011s] +Computing alignments... [0.75s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.273s +Reported 30044 pairwise alignments, 30046 HSPs. +30044 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.048s] +Masking queries... [0.033s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.056s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.072s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.052s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.009s] +Building seed filter... [0.004s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Deallocating buffers... [0.007s] +Computing alignments... [0.744s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.247s +Reported 30358 pairwise alignments, 30361 HSPs. +30358 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn04_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.015s] +Masking queries... [0.021s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.153s] +Masking reference... [0.109s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.006s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.05s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Deallocating buffers... [0.008s] +Computing alignments... [0.232s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.437s +Reported 6507 pairwise alignments, 6514 HSPs. +6507 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.017s] +Masking queries... [0.021s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.113s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Deallocating buffers... [0.008s] +Computing alignments... [0.241s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.437s +Reported 6501 pairwise alignments, 6504 HSPs. +6501 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.018s] +Masking queries... [0.02s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.145s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.023s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Deallocating buffers... [0.007s] +Computing alignments... [0.238s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.418s +Reported 6510 pairwise alignments, 6515 HSPs. +6510 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Emar_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.027s] +Masking queries... [0.025s] +Building query seed set... [0.03s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.015s] +Computing hash join... [0.015s] +Building seed filter... [0.004s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Deallocating buffers... [0.008s] +Computing alignments... [0.367s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.744s +Reported 19529 pairwise alignments, 19530 HSPs. +19529 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.022s] +Building query seed set... [0.029s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.106s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.013s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.023s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.06s] +Building query seed array... [0.019s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Deallocating buffers... [0.007s] +Computing alignments... [0.375s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.776s +Reported 19648 pairwise alignments, 19649 HSPs. +19648 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.025s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.015s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.007s] +Computing alignments... [0.374s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.739s +Reported 19425 pairwise alignments, 19428 HSPs. +19425 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Rsp1_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.025s] +Masking queries... [0.023s] +Building query seed set... [0.026s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.159s] +Masking reference... [0.103s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.009s] +Computing alignments... [0.284s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.579s +Reported 11921 pairwise alignments, 11926 HSPs. +11921 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.019s] +Building query seed set... [0.026s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.162s] +Masking reference... [0.108s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.015s] +Computing hash join... [0.021s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.021s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Deallocating buffers... [0.01s] +Computing alignments... [0.284s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.578s +Reported 11920 pairwise alignments, 11925 HSPs. +11920 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.023s] +Masking queries... [0.023s] +Building query seed set... [0.026s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.16s] +Masking reference... [0.114s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.07s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Deallocating buffers... [0.008s] +Computing alignments... [0.285s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.562s +Reported 11928 pairwise alignments, 11933 HSPs. +11928 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Sspa_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.022s] +Masking queries... [0.023s] +Building query seed set... [0.024s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.155s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.064s] +Building query seed array... [0.017s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.056s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Deallocating buffers... [0.008s] +Computing alignments... [0.265s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.554s +Reported 8536 pairwise alignments, 8540 HSPs. +8536 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.023s] +Masking queries... [0.024s] +Building query seed set... [0.027s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.171s] +Masking reference... [0.112s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.045s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.019s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.009s] +Building seed filter... [0.006s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.074s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Deallocating buffers... [0.007s] +Computing alignments... [0.266s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.547s +Reported 8521 pairwise alignments, 8526 HSPs. +8521 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0.001s] +Opening the output file... [0s] +Loading query sequences... [0.024s] +Masking queries... [0.023s] +Building query seed set... [0.025s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.154s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Deallocating buffers... [0.009s] +Computing alignments... [0.276s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.501s +Reported 8540 pairwise alignments, 8546 HSPs. +8540 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Hhir_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.031s] +Masking queries... [0.024s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.16s] +Masking reference... [0.11s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.068s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.02s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.064s] +Building query seed array... [0.024s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.054s] +Building query seed array... [0.023s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.065s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Deallocating buffers... [0.008s] +Computing alignments... [0.389s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.831s +Reported 14131 pairwise alignments, 14135 HSPs. +14131 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0.001s] +Opening the output file... [0s] +Loading query sequences... [0.038s] +Masking queries... [0.034s] +Building query seed set... [0.038s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.176s] +Masking reference... [0.103s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.02s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.008s] +Computing alignments... [0.39s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.849s +Reported 14101 pairwise alignments, 14103 HSPs. +14101 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.023s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.176s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.022s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.063s] +Building query seed array... [0.022s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.024s] +Computing hash join... [0.012s] +Building seed filter... [0.005s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Deallocating buffers... [0.008s] +Computing alignments... [0.399s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.842s +Reported 14108 pairwise alignments, 14110 HSPs. +14108 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn12_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.029s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.165s] +Masking reference... [0.114s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.053s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.018s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.066s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.058s] +Building query seed array... [0.022s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Deallocating buffers... [0.007s] +Computing alignments... [0.35s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.759s +Reported 13993 pairwise alignments, 13999 HSPs. +13993 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.026s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.162s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.053s] +Building query seed array... [0.015s] +Computing hash join... [0.018s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.063s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.007s] +Computing alignments... [0.371s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.711s +Reported 13983 pairwise alignments, 13987 HSPs. +13983 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.023s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.156s] +Masking reference... [0.106s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.019s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.019s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Deallocating buffers... [0.007s] +Computing alignments... [0.367s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.693s +Reported 13981 pairwise alignments, 13986 HSPs. +13981 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn14_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.053s] +Masking queries... [0.038s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.013s] +Computing hash join... [0.019s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.015s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.04s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.036s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Deallocating buffers... [0.008s] +Computing alignments... [0.647s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.04s +Reported 25265 pairwise alignments, 25285 HSPs. +25265 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.059s] +Masking queries... [0.031s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.026s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.018s] +Computing hash join... [0.016s] +Building seed filter... [0.004s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.048s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.043s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.044s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Deallocating buffers... [0.007s] +Computing alignments... [0.666s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.084s +Reported 25226 pairwise alignments, 25252 HSPs. +25226 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.057s] +Masking queries... [0.031s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.024s] +Allocating buffers... [0s] +Loading reference sequences... [0.15s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.021s] +Computing hash join... [0.02s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.044s] +Building query seed array... [0.014s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.016s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Deallocating buffers... [0.007s] +Computing alignments... [0.668s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.088s +Reported 25233 pairwise alignments, 25260 HSPs. +25233 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Gl02_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.034s] +Masking queries... [0.027s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.022s] +Allocating buffers... [0s] +Loading reference sequences... [0.158s] +Masking reference... [0.11s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.053s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.06s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Deallocating buffers... [0.006s] +Computing alignments... [0.45s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.76s +Reported 17375 pairwise alignments, 17379 HSPs. +17375 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.037s] +Masking queries... [0.024s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.011s] +Computing hash join... [0.017s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.042s] +Building query seed array... [0.009s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.042s] +Building query seed array... [0.011s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.01s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Deallocating buffers... [0.008s] +Computing alignments... [0.465s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.773s +Reported 17358 pairwise alignments, 17365 HSPs. +17358 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.025s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.112s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.061s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.055s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.013s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.012s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.022s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.009s] +Computing alignments... [0.47s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.846s +Reported 17328 pairwise alignments, 17333 HSPs. +17328 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv05_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.027s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.106s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.069s] +Building query seed array... [0.017s] +Computing hash join... [0.016s] +Building seed filter... [0.004s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.018s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.017s] +Computing hash join... [0.01s] +Building seed filter... [0.005s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.053s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Deallocating buffers... [0.007s] +Computing alignments... [0.389s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.774s +Reported 17843 pairwise alignments, 17846 HSPs. +17843 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.023s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.153s] +Masking reference... [0.105s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.02s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.049s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.048s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.016s] +Deallocating buffers... [0.006s] +Computing alignments... [0.394s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.719s +Reported 17817 pairwise alignments, 17820 HSPs. +17817 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.029s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.161s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.054s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.015s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.068s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.022s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.059s] +Building query seed array... [0.026s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.058s] +Building query seed array... [0.021s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.019s] +Deallocating buffers... [0.008s] +Computing alignments... [0.394s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.838s +Reported 17794 pairwise alignments, 17797 HSPs. +17794 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Bv01_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.01s] +Masking queries... [0.018s] +Building query seed set... [0.012s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.155s] +Masking reference... [0.122s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.066s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.056s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.011s] +Computing hash join... [0.013s] +Building seed filter... [0.006s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Deallocating buffers... [0.007s] +Computing alignments... [0.177s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.345s +Reported 4888 pairwise alignments, 4891 HSPs. +4888 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.01s] +Masking queries... [0.016s] +Building query seed set... [0.011s] +Algorithm: Double-indexed +Building query histograms... [0.005s] +Allocating buffers... [0s] +Loading reference sequences... [0.163s] +Masking reference... [0.126s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.063s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.063s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.012s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.012s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.014s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.008s] +Deallocating buffers... [0.007s] +Computing alignments... [0.176s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.367s +Reported 4897 pairwise alignments, 4900 HSPs. +4897 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.018s] +Masking queries... [0.018s] +Building query seed set... [0.012s] +Algorithm: Double-indexed +Building query histograms... [0.005s] +Allocating buffers... [0s] +Loading reference sequences... [0.155s] +Masking reference... [0.114s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.055s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.057s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.056s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.01s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.012s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.011s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.012s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.009s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.053s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.013s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Deallocating buffers... [0.009s] +Computing alignments... [0.175s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.345s +Reported 4885 pairwise alignments, 4889 HSPs. +4885 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Slin_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.046s] +Masking queries... [0.035s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.022s] +Allocating buffers... [0s] +Loading reference sequences... [0.156s] +Masking reference... [0.112s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.06s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.058s] +Building query seed array... [0.016s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.069s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.016s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.045s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.044s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Deallocating buffers... [0.011s] +Computing alignments... [0.661s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.164s +Reported 26876 pairwise alignments, 26884 HSPs. +26876 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.06s] +Masking queries... [0.035s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.021s] +Allocating buffers... [0s] +Loading reference sequences... [0.148s] +Masking reference... [0.107s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.052s] +Building query seed array... [0.014s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.046s] +Building query seed array... [0.017s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.036s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Deallocating buffers... [0.007s] +Computing alignments... [0.68s] +Deallocating reference... [0.002s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.156s +Reported 26862 pairwise alignments, 26870 HSPs. +26862 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.054s] +Masking queries... [0.031s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.157s] +Masking reference... [0.104s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.065s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.066s] +Building query seed array... [0.015s] +Computing hash join... [0.013s] +Building seed filter... [0.004s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.014s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.046s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.044s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.042s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Deallocating buffers... [0.01s] +Computing alignments... [0.671s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.192s +Reported 26880 pairwise alignments, 26889 HSPs. +26880 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Gl03_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.04s] +Masking queries... [0.032s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.113s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.058s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.023s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.021s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.019s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.022s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.049s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.02s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.022s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.048s] +Building query seed array... [0.021s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Deallocating buffers... [0.008s] +Computing alignments... [0.644s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.178s +Reported 24415 pairwise alignments, 24421 HSPs. +24415 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.059s] +Masking queries... [0.03s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.018s] +Allocating buffers... [0s] +Loading reference sequences... [0.156s] +Masking reference... [0.106s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.062s] +Building query seed array... [0.014s] +Computing hash join... [0.016s] +Building seed filter... [0.003s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.051s] +Building query seed array... [0.015s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.013s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.052s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.045s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.017s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.016s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Deallocating buffers... [0.008s] +Computing alignments... [0.654s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.161s +Reported 24409 pairwise alignments, 24417 HSPs. +24409 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.059s] +Masking queries... [0.029s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.151s] +Masking reference... [0.106s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.06s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.019s] +Computing hash join... [0.013s] +Building seed filter... [0.006s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.051s] +Building query seed array... [0.013s] +Computing hash join... [0.013s] +Building seed filter... [0.005s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.013s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.039s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.043s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.047s] +Building query seed array... [0.013s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.047s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.012s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Deallocating buffers... [0.008s] +Computing alignments... [0.643s] +Deallocating reference... [0.005s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.112s +Reported 24403 pairwise alignments, 24408 HSPs. +24403 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn09_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.03s] +Masking queries... [0.029s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.147s] +Masking reference... [0.115s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.018s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.059s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.015s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.051s] +Building query seed array... [0.018s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.049s] +Building query seed array... [0.019s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.018s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.044s] +Building query seed array... [0.016s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.019s] +Deallocating buffers... [0.009s] +Computing alignments... [0.42s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.79s +Reported 15965 pairwise alignments, 15969 HSPs. +15965 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13 +Opening the database... [0.002s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.037s] +Masking queries... [0.024s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.149s] +Masking reference... [0.107s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.011s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.047s] +Building query seed array... [0.011s] +Computing hash join... [0.009s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.043s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.042s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.054s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.05s] +Building query seed array... [0.015s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.041s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Deallocating buffers... [0.009s] +Computing alignments... [0.428s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.723s +Reported 15949 pairwise alignments, 15952 HSPs. +15949 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13 +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.025s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.144s] +Masking reference... [0.117s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.063s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.068s] +Building query seed array... [0.012s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.058s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.058s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.052s] +Building query seed array... [0.008s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.05s] +Building query seed array... [0.016s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.055s] +Building query seed array... [0.014s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.046s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Deallocating buffers... [0.008s] +Computing alignments... [0.414s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.797s +Reported 15941 pairwise alignments, 15945 HSPs. +15941 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Nn13_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13 Folder + + +Next Script is: 5_GCodeTranslate.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.005s] +Masking queries... [0.01s] +Building query seed set... [0.005s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.153s] +Masking reference... [0.104s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.056s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.065s] +Building query seed array... [0.011s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.065s] +Building query seed array... [0.009s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.013s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.006s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.009s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.003s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.061s] +Building query seed array... [0.01s] +Computing hash join... [0.011s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.013s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.014s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Deallocating buffers... [0.007s] +Computing alignments... [0.042s] +Deallocating reference... [0.008s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.11s +Reported 706 pairwise alignments, 706 HSPs. +706 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.002s] +Masking queries... [0.012s] +Building query seed set... [0.004s] +Algorithm: Double-indexed +Building query histograms... [0.003s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.108s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.057s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.061s] +Building query seed array... [0.011s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.057s] +Building query seed array... [0.01s] +Computing hash join... [0.015s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.057s] +Building query seed array... [0.011s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.05s] +Building query seed array... [0.008s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.048s] +Building query seed array... [0.007s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.06s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.056s] +Building query seed array... [0.01s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.057s] +Building query seed array... [0.009s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Deallocating buffers... [0.008s] +Computing alignments... [0.042s] +Deallocating reference... [0.004s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.098s +Reported 700 pairwise alignments, 700 HSPs. +700 queries aligned. +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps +Opening the database... [0.001s] +#Target sequences to report alignments for: 1 +Reference = ../Databases/db_StopFreq/RepEukProts.dmnd +Sequences = 156593 +Letters = 61953133 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.002s] +Masking queries... [0.012s] +Building query seed set... [0.003s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.152s] +Masking reference... [0.117s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.059s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.064s] +Building query seed array... [0.011s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.055s] +Building query seed array... [0.01s] +Computing hash join... [0.014s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.054s] +Building query seed array... [0.009s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.047s] +Building query seed array... [0.011s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.001s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.047s] +Building query seed array... [0.012s] +Computing hash join... [0.012s] +Building seed filter... [0.007s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.052s] +Building query seed array... [0.01s] +Computing hash join... [0.01s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.053s] +Building query seed array... [0.01s] +Computing hash join... [0.012s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.049s] +Building query seed array... [0.01s] +Computing hash join... [0.013s] +Building seed filter... [0.003s] +Searching alignments... [0.002s] +Deallocating buffers... [0.011s] +Computing alignments... [0.038s] +Deallocating reference... [0.003s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.088s +Reported 709 pairwise alignments, 709 HSPs. +709 queries aligned. +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + +Identifying ORFs in the Fasta file based on the output of 3_CountOGsDiamond.py + + + +Translating DNA using TAA as the sole STOP codon + + + +Translating DNA using TGA as the sole STOP codon + + + +Translating DNA using TAG as the sole STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TGA as only STOP codon + + + +Writing FASTA files with ORF and Protein sequences with TAG as only STOP codon + + + +Gathering Sequence information from FASTA and TSV files + + +Collecting in-frame stop codon information when TGA is the only STOP + + +Collecting in-frame stop codon information when TAG is the only STOP + + +Collecting in-frame stop codon information when TAA is the only STOP + + +Look for Sr_rh_Atps_WTA_EPU.Renamed_StopCodonStats.tsv in the /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps Folder + + +Next Script is: 5_GCodeTranslate.py + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn02_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn02_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn02_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn02_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn02_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn02 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Gspa_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Gspa_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Gspa_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Gspa_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Gspa_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Gspa Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn06_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn06_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn06_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn06_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn06_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn06 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Bv02_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv02_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv02_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv02_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv02_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv02 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Hind_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Hind_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Hind_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Hind_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Hind_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Hind Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Nn11_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn11_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn11_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn11_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn11_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn11 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Bv03_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv03_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv03_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv03_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv03_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv03 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Gsp1_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Gsp1_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Gsp1_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Gsp1_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Gsp1_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Gsp1 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Bv06_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv06_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv06_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv06_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv06_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv06 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Gl01_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Gl01_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Gl01_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Gl01_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Gl01_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Gl01 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Esca_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Esca_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Esca_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Esca_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Esca_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Esca Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Calb_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Calb_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Calb_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Calb_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Calb_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Calb Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Emac_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Emac_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Emac_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Emac_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Emac_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Emac Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn05_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn05_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn05_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn05_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn05_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn05 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn10_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn10_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn10_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn10_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn10_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn10 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Nn03_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn03_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn03_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn03_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn03_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn03 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Halb_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Halb_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Halb_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Halb_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Halb_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Halb Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Nn08_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn08_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn08_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn08_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn08_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn08 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Tx01_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Tx01_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Tx01_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Tx01_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Tx01_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Tx01 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn07_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn07_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn07_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn07_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn07_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn07 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn01_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn01_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn01_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn01_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn01_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn01 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Bv04_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv04_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv04_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv04_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv04_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv04 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Usac_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Usac_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Usac_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Usac_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Usac_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Usac Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn04_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn04_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn04_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn04_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn04_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn04 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Emar_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Emar_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Emar_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Emar_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Emar_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Emar Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Rsp1_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Rsp1_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Rsp1_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Rsp1_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Rsp1_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Rsp1 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Sspa_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Sspa_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Sspa_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Sspa_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Sspa_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Sspa Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Hhir_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Hhir_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Hhir_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Hhir_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Hhir_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Hhir Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn12_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn12_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn12_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn12_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn12_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn12 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn14_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn14_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn14_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn14_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn14_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn14 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Gl02_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Gl02_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Gl02_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Gl02_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Gl02_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Gl02 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Bv05_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv05_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv05_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv05_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv05_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv05 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Bv01_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Bv01_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Bv01_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Bv01_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Bv01_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Bv01 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Slin_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Slin_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Slin_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Slin_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Slin_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Slin Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + + + +Grabbing useful info from the Sr_rh_Gl03_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Gl03_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Gl03_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Gl03_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Gl03_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Gl03 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn09_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn09_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn09_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn09_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn09_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn09 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Nn13_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Nn13_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Nn13_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Nn13_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Nn13_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Nn13 Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) +/gridapps/software/Biopython/1.75-foss-2019b-Python-3.7.4/lib/python3.7/site-packages/Bio/Seq.py:2748: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future. + BiopythonWarning) + + +Grabbing useful info from the Sr_rh_Atps_WTA_EPU.Renamed.fasta Fasta File +and from the Sr_rh_Atps_WTA_EPU.Renamed_allOGCleanresults.tsv OG-Assignment Spreadsheet + + +Extracting ORFs from the transcriptomic data-set + + + + +Translating ORFs from using the Universal genetic code + + +Writing FASTA file with ORF sequences using the Universal genetic code + + +Writing FASTA file with Translated ORF sequences using the Universal genetic code + + +Look for Sr_rh_Atps_WTA_EPU.Renamed_Universal_NTD.ORF.fasta, +Sr_rh_Atps_WTA_EPU.Renamed_Universal_AA.ORF.fasta, and +Sr_rh_Atps_WTA_EPU.Renamed_Universal_allOGCleanresults.tsv, +which are in the Sr_rh_Atps Folder + + +Next Script is: 6_FilterPartials.py in the FinalizeTranscripts Folder +with a copy of the outputs of this script! + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/Original/Concatenated/Sr_rh_Rsp1.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.024s] +Masking sequences... [0.024s] +Writing sequences... [0.008s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = bab6450592789937ae623d8d5a857e95 +Processed 23164 sequences, 4715295 letters. +Total time = 0.063s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Rsp1/Original/Concatenated/Sr_rh_Rsp1.AA.Concatenated.dmnd +Sequences = 23164 +Letters = 4715295 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.032s] +Masking queries... [0.022s] +Building query seed set... [0.03s] +Algorithm: Double-indexed +Building query histograms... [0.011s] +Allocating buffers... [0s] +Loading reference sequences... [0.014s] +Masking reference... [0.014s] +Initializing temporary storage... [0.008s] +Building reference histograms... [0.011s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.017s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.021s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.015s] +Building query seed array... [0.018s] +Computing hash join... [0.004s] +Building seed filter... [0.006s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.052s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.02s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.02s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.023s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.049s] +Deallocating buffers... [0.001s] +Computing alignments... [0.529s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.449s +Reported 213997 pairwise alignments, 214009 HSPs. +20278 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Rsp1 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Rsp1 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Rsp1 + + +There were 23164 ORFs originally, with 402 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 23164 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1460 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3886 Unique ORFs for Sr_rh_Rsp1 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/Original/Concatenated/Sr_rh_Gspa.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.015s] +Masking sequences... [0.022s] +Writing sequences... [0.005s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = a6c8290dedcc9f8988a2173a11da18c7 +Processed 7105 sequences, 3003718 letters. +Total time = 0.048s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gspa/Original/Concatenated/Sr_rh_Gspa.AA.Concatenated.dmnd +Sequences = 7105 +Letters = 3003718 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.021s] +Masking queries... [0.022s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.011s] +Masking reference... [0.012s] +Initializing temporary storage... [0.008s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.012s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.008s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.007s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.01s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.016s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.014s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.007s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.008s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.016s] +Deallocating buffers... [0s] +Computing alignments... [0.163s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.688s +Reported 21465 pairwise alignments, 21479 HSPs. +4188 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Gspa at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Gspa + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Gspa + + +There were 7105 ORFs originally, with 403 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 7105 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 247 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3885 Unique ORFs for Sr_rh_Gspa + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/Original/Concatenated/Sr_rh_Bv06.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.025s] +Masking sequences... [0.025s] +Writing sequences... [0.006s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0.001s] +Database hash = 0d69e46f38fb37d85dde5e9893f95811 +Processed 13330 sequences, 3179706 letters. +Total time = 0.064s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv06/Original/Concatenated/Sr_rh_Bv06.AA.Concatenated.dmnd +Sequences = 13330 +Letters = 3179706 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.019s] +Masking queries... [0.02s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.01s] +Masking reference... [0.013s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.01s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.015s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.01s] +Building query seed array... [0.007s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.013s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.013s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Deallocating buffers... [0s] +Computing alignments... [0.354s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.974s +Reported 118156 pairwise alignments, 118180 HSPs. +10897 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv06 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv06 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv06 + + +There were 13330 ORFs originally, with 247 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 13330 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 708 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3447 Unique ORFs for Sr_rh_Bv06 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/Original/Concatenated/Sr_rh_Bv05.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.034s] +Masking sequences... [0.032s] +Writing sequences... [0.01s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0.001s] +Database hash = f232a8887cb0b9a70c7d1734930dc141 +Processed 21731 sequences, 6537985 letters. +Total time = 0.084s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv05/Original/Concatenated/Sr_rh_Bv05.AA.Concatenated.dmnd +Sequences = 21731 +Letters = 6537985 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.046s] +Masking queries... [0.03s] +Building query seed set... [0.037s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.02s] +Masking reference... [0.019s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.017s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.011s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.08s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.017s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.064s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.01s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.009s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.053s] +Deallocating buffers... [0s] +Computing alignments... [1.01s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.041s +Reported 247684 pairwise alignments, 247700 HSPs. +18611 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv05 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv05 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv05 + + +There were 21731 ORFs originally, with 985 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 21731 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1186 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4673 Unique ORFs for Sr_rh_Bv05 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/Original/Concatenated/Sr_rh_Hhir.AA.Concatenated.fasta +Opening the database file... [0.001s] +Loading sequences... [0.029s] +Masking sequences... [0.025s] +Writing sequences... [0.007s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 428316e31713e49d2d7dce86c739ac6d +Processed 10555 sequences, 4383683 letters. +Total time = 0.067s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hhir/Original/Concatenated/Sr_rh_Hhir.AA.Concatenated.dmnd +Sequences = 10555 +Letters = 4383683 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.021s] +Building query seed set... [0.028s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.011s] +Masking reference... [0.015s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.013s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.015s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.035s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.012s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.012s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.013s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.019s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Deallocating buffers... [0s] +Computing alignments... [0.348s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.045s +Reported 74899 pairwise alignments, 74902 HSPs. +8134 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Hhir at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Hhir + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Hhir + + +There were 10555 ORFs originally, with 539 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 10555 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 276 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3796 Unique ORFs for Sr_rh_Hhir + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/Original/Concatenated/Sr_rh_Usac.AA.Concatenated.fasta +Opening the database file... [0.003s] +Loading sequences... [0.04s] +Masking sequences... [0.03s] +Writing sequences... [0.01s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 8b58851e74ea699eb4d51c09aeec1dbf +Processed 28139 sequences, 6273041 letters. +Total time = 0.088s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Usac/Original/Concatenated/Sr_rh_Usac.AA.Concatenated.dmnd +Sequences = 28139 +Letters = 6273041 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.04s] +Masking queries... [0.029s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.019s] +Masking reference... [0.021s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.013s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.02s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.006s] +Searching alignments... [0.075s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.054s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.011s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.054s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.009s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.053s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.045s] +Deallocating buffers... [0s] +Computing alignments... [0.662s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.57s +Reported 254805 pairwise alignments, 254821 HSPs. +22199 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Usac at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Usac + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Usac + + +There were 28139 ORFs originally, with 552 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 28139 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 2668 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5891 Unique ORFs for Sr_rh_Usac + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/Original/Concatenated/Sr_rh_Nn12.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.027s] +Masking sequences... [0.025s] +Writing sequences... [0.008s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = e7d4a4c31fbd8f98b96b01981ecc8606 +Processed 17423 sequences, 5488596 letters. +Total time = 0.067s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn12/Original/Concatenated/Sr_rh_Nn12.AA.Concatenated.dmnd +Sequences = 17423 +Letters = 5488596 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.023s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.018s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.012s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.054s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.021s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.02s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.044s] +Deallocating buffers... [0.001s] +Computing alignments... [0.559s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.461s +Reported 130509 pairwise alignments, 130513 HSPs. +13655 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn12 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn12 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn12 + + +There were 17423 ORFs originally, with 714 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 17423 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1428 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4918 Unique ORFs for Sr_rh_Nn12 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/Original/Concatenated/Sr_rh_Nn11.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.025s] +Masking sequences... [0.027s] +Writing sequences... [0.009s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = ce23face356f3db46ecedaa798ad5aef +Processed 16742 sequences, 5190604 letters. +Total time = 0.068s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn11/Original/Concatenated/Sr_rh_Nn11.AA.Concatenated.dmnd +Sequences = 16742 +Letters = 5190604 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.023s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.016s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.011s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.014s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.017s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.043s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.019s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.037s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.034s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.033s] +Deallocating buffers... [0s] +Computing alignments... [0.486s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.293s +Reported 118397 pairwise alignments, 118399 HSPs. +12869 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn11 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn11 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn11 + + +There were 16742 ORFs originally, with 605 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 16742 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1489 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4851 Unique ORFs for Sr_rh_Nn11 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/Original/Concatenated/Sr_rh_Emac.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.019s] +Masking sequences... [0.021s] +Writing sequences... [0.005s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0.001s] +Database hash = 64cf2a04524c8e16fd6656e71370d508 +Processed 9237 sequences, 3633229 letters. +Total time = 0.051s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emac/Original/Concatenated/Sr_rh_Emac.AA.Concatenated.dmnd +Sequences = 9237 +Letters = 3633229 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.023s] +Masking queries... [0.021s] +Building query seed set... [0.022s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.009s] +Masking reference... [0.013s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.011s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.013s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.007s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.011s] +Building query seed array... [0.011s] +Computing hash join... [0.004s] +Building seed filter... [0.006s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.005s] +Building seed filter... [0.007s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Deallocating buffers... [0s] +Computing alignments... [0.293s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.903s +Reported 63591 pairwise alignments, 63591 HSPs. +7137 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Emac at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Emac + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Emac + + +There were 9237 ORFs originally, with 832 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 9237 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 279 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3032 Unique ORFs for Sr_rh_Emac + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/Original/Concatenated/Sr_rh_Nn07.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.039s] +Masking sequences... [0.032s] +Writing sequences... [0.012s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0.001s] +Database hash = 8244245367c3ffdbc45bb090b15f7b05 +Processed 23977 sequences, 7324212 letters. +Total time = 0.092s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn07/Original/Concatenated/Sr_rh_Nn07.AA.Concatenated.dmnd +Sequences = 23977 +Letters = 7324212 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.05s] +Masking queries... [0.027s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.022s] +Masking reference... [0.022s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.018s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.012s] +Computing hash join... [0.005s] +Building seed filter... [0.007s] +Searching alignments... [0.214s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.005s] +Searching alignments... [0.153s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.147s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.135s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.161s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.126s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.136s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.015s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.128s] +Deallocating buffers... [0.001s] +Computing alignments... [1.08s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.825s +Reported 245593 pairwise alignments, 245597 HSPs. +19882 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn07 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn07 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn07 + + +There were 23977 ORFs originally, with 2660 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 23977 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1830 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5510 Unique ORFs for Sr_rh_Nn07 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/Original/Concatenated/Sr_rh_Nn13.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.033s] +Masking sequences... [0.028s] +Writing sequences... [0.009s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 6284262dc2b368563c66830b2de682fe +Processed 19632 sequences, 6226238 letters. +Total time = 0.077s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn13/Original/Concatenated/Sr_rh_Nn13.AA.Concatenated.dmnd +Sequences = 19632 +Letters = 6226238 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.027s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.016s] +Masking reference... [0.019s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.013s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.014s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.092s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.013s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.012s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.072s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.009s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.011s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.01s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.009s] +Building query seed array... [0.009s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.059s] +Deallocating buffers... [0s] +Computing alignments... [0.711s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.702s +Reported 167005 pairwise alignments, 167007 HSPs. +15814 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn13 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn13 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn13 + + +There were 19632 ORFs originally, with 1170 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 19632 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1617 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5105 Unique ORFs for Sr_rh_Nn13 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/Original/Concatenated/Sr_rh_Hind.AA.Concatenated.fasta +Opening the database file... [0.001s] +Loading sequences... [0.023s] +Masking sequences... [0.024s] +Writing sequences... [0.007s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 31071ed620ecc09074d6eef75556a9f9 +Processed 10997 sequences, 4801737 letters. +Total time = 0.059s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Hind/Original/Concatenated/Sr_rh_Hind.AA.Concatenated.dmnd +Sequences = 10997 +Letters = 4801737 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.026s] +Masking queries... [0.021s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.012s] +Masking reference... [0.016s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.048s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.039s] +Deallocating buffers... [0s] +Computing alignments... [0.412s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.221s +Reported 85965 pairwise alignments, 86115 HSPs. +9226 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Hind at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Hind + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Hind + + +There were 10997 ORFs originally, with 1346 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 10997 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 307 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 2768 Unique ORFs for Sr_rh_Hind + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/Original/Concatenated/Sr_rh_Slin.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.016s] +Masking sequences... [0.016s] +Writing sequences... [0.003s] +Hashing sequences... [0s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = eb21f94236c55caa57cea789977257e5 +Processed 5832 sequences, 1943459 letters. +Total time = 0.04s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Slin/Original/Concatenated/Sr_rh_Slin.AA.Concatenated.dmnd +Sequences = 5832 +Letters = 1943459 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.017s] +Masking queries... [0.015s] +Building query seed set... [0.012s] +Algorithm: Double-indexed +Building query histograms... [0.005s] +Allocating buffers... [0s] +Loading reference sequences... [0.007s] +Masking reference... [0.009s] +Initializing temporary storage... [0.008s] +Building reference histograms... [0.004s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.01s] +Building query seed array... [0.011s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.009s] +Building query seed array... [0.011s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.019s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.012s] +Building query seed array... [0.008s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.017s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.008s] +Building query seed array... [0.008s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.018s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.009s] +Building query seed array... [0.009s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.015s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.009s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.015s] +Deallocating buffers... [0s] +Computing alignments... [0.144s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.621s +Reported 32543 pairwise alignments, 32545 HSPs. +3816 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Slin at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Slin + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Slin + + +There were 5832 ORFs originally, with 441 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 5832 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 279 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 2566 Unique ORFs for Sr_rh_Slin + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/Original/Concatenated/Sr_rh_Atps.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.002s] +Masking sequences... [0.011s] +Writing sequences... [0s] +Hashing sequences... [0s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 032e18933d35f2dcdbd89f8365a0e29b +Processed 1572 sequences, 304954 letters. +Total time = 0.018s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Atps/Original/Concatenated/Sr_rh_Atps.AA.Concatenated.dmnd +Sequences = 1572 +Letters = 304954 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.002s] +Masking queries... [0.009s] +Building query seed set... [0.003s] +Algorithm: Double-indexed +Building query histograms... [0.003s] +Allocating buffers... [0s] +Loading reference sequences... [0.001s] +Masking reference... [0.004s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.003s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.014s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.007s] +Computing hash join... [0.002s] +Building seed filter... [0.005s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.007s] +Building query seed array... [0.004s] +Computing hash join... [0.001s] +Building seed filter... [0.003s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.007s] +Building query seed array... [0.006s] +Computing hash join... [0.002s] +Building seed filter... [0.005s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.006s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.013s] +Building query seed array... [0.007s] +Computing hash join... [0.001s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.007s] +Building query seed array... [0.006s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.006s] +Building query seed array... [0.003s] +Computing hash join... [0.001s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Deallocating buffers... [0s] +Computing alignments... [0.031s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.307s +Reported 2855 pairwise alignments, 2855 HSPs. +490 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Atps at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Atps + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Atps + + +There were 1572 ORFs originally, with 39 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 1572 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 119 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 1065 Unique ORFs for Sr_rh_Atps + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/Original/Concatenated/Sr_rh_Calb.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.026s] +Masking sequences... [0.024s] +Writing sequences... [0.009s] +Hashing sequences... [0.003s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = c31e6238b80f6551ff9fba31e8253bf7 +Processed 10532 sequences, 3924177 letters. +Total time = 0.067s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Calb/Original/Concatenated/Sr_rh_Calb.AA.Concatenated.dmnd +Sequences = 10532 +Letters = 3924177 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.022s] +Masking queries... [0.024s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.01s] +Masking reference... [0.013s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.012s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.012s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.013s] +Building query seed array... [0.016s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.013s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Deallocating buffers... [0s] +Computing alignments... [0.38s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.027s +Reported 89461 pairwise alignments, 89467 HSPs. +8205 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Calb at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Calb + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Calb + + +There were 10532 ORFs originally, with 339 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 10532 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 350 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3460 Unique ORFs for Sr_rh_Calb + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/Original/Concatenated/Sr_rh_Gsp1.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.018s] +Masking sequences... [0.021s] +Writing sequences... [0.005s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = e29604468b49ff25602af34bf175d55b +Processed 9075 sequences, 3395806 letters. +Total time = 0.049s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gsp1/Original/Concatenated/Sr_rh_Gsp1.AA.Concatenated.dmnd +Sequences = 9075 +Letters = 3395806 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.018s] +Masking queries... [0.021s] +Building query seed set... [0.023s] +Algorithm: Double-indexed +Building query histograms... [0.009s] +Allocating buffers... [0s] +Loading reference sequences... [0.01s] +Masking reference... [0.013s] +Initializing temporary storage... [0.008s] +Building reference histograms... [0.006s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.031s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.015s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.008s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.009s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.012s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.02s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.007s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Deallocating buffers... [0s] +Computing alignments... [0.272s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.859s +Reported 57494 pairwise alignments, 57495 HSPs. +6675 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Gsp1 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Gsp1 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Gsp1 + + +There were 9075 ORFs originally, with 410 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 9075 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 386 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3393 Unique ORFs for Sr_rh_Gsp1 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/Original/Concatenated/Sr_rh_Nn08.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.022s] +Masking sequences... [0.026s] +Writing sequences... [0.007s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 5aaed5bdda45faa3943131907b7845d5 +Processed 15500 sequences, 3990841 letters. +Total time = 0.062s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn08/Original/Concatenated/Sr_rh_Nn08.AA.Concatenated.dmnd +Sequences = 15500 +Letters = 3990841 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.024s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.017s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.015s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.158s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.023s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.124s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.022s] +Building query seed array... [0.016s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.113s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.109s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.017s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.102s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.021s] +Building query seed array... [0.012s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.101s] +Deallocating buffers... [0s] +Computing alignments... [0.615s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.065s +Reported 166114 pairwise alignments, 166116 HSPs. +12320 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn08 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn08 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn08 + + +There were 15500 ORFs originally, with 2616 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 15500 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1059 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4023 Unique ORFs for Sr_rh_Nn08 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/Original/Concatenated/Sr_rh_Nn04.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.048s] +Masking sequences... [0.037s] +Writing sequences... [0.015s] +Hashing sequences... [0.004s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 95f78e5fe98e9036e94658c2ba4f58da +Processed 36674 sequences, 9173466 letters. +Total time = 0.11s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn04/Original/Concatenated/Sr_rh_Nn04.AA.Concatenated.dmnd +Sequences = 36674 +Letters = 9173466 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.049s] +Masking queries... [0.034s] +Building query seed set... [0.039s] +Algorithm: Double-indexed +Building query histograms... [0.021s] +Allocating buffers... [0s] +Loading reference sequences... [0.025s] +Masking reference... [0.029s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.018s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.019s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.483s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.022s] +Building query seed array... [0.016s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.347s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.025s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.375s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.021s] +Building query seed array... [0.013s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.345s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.02s] +Building query seed array... [0.011s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.347s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.023s] +Building query seed array... [0.016s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.298s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.026s] +Building query seed array... [0.02s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.307s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.02s] +Building query seed array... [0.011s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.29s] +Deallocating buffers... [0.002s] +Computing alignments... [1.939s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 5.355s +Reported 475363 pairwise alignments, 475381 HSPs. +31089 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn04 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn04 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn04 + + +There were 36674 ORFs originally, with 7073 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 36674 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 3043 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 6419 Unique ORFs for Sr_rh_Nn04 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/Original/Concatenated/Sr_rh_Bv01.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.027s] +Masking sequences... [0.027s] +Writing sequences... [0.009s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 32762a5b753a4aa6704d518df14f98b0 +Processed 22250 sequences, 5423683 letters. +Total time = 0.07s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv01/Original/Concatenated/Sr_rh_Bv01.AA.Concatenated.dmnd +Sequences = 22250 +Letters = 5423683 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.026s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.017s] +Masking reference... [0.016s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.011s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.02s] +Building query seed array... [0.018s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.05s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.047s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.046s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.017s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.045s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.016s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.044s] +Deallocating buffers... [0s] +Computing alignments... [0.694s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.582s +Reported 210419 pairwise alignments, 210421 HSPs. +17900 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv01 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv01 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv01 + + +There were 22250 ORFs originally, with 925 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 22250 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1761 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5198 Unique ORFs for Sr_rh_Bv01 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/Original/Concatenated/Sr_rh_Nn05.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.026s] +Masking sequences... [0.031s] +Writing sequences... [0.009s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 53344b07d607f88908ffa47877c05848 +Processed 17028 sequences, 5371066 letters. +Total time = 0.073s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn05/Original/Concatenated/Sr_rh_Nn05.AA.Concatenated.dmnd +Sequences = 17028 +Letters = 5371066 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.028s] +Masking queries... [0.026s] +Building query seed set... [0.032s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.013s] +Masking reference... [0.019s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.017s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.015s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.064s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.022s] +Building query seed array... [0.025s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.065s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.056s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.053s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.021s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.018s] +Building query seed array... [0.02s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.053s] +Deallocating buffers... [0s] +Computing alignments... [0.573s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.605s +Reported 136247 pairwise alignments, 136251 HSPs. +13401 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn05 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn05 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn05 + + +There were 17028 ORFs originally, with 1026 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 17028 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1409 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4799 Unique ORFs for Sr_rh_Nn05 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/Original/Concatenated/Sr_rh_Gl03.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.052s] +Masking sequences... [0.051s] +Writing sequences... [0.016s] +Hashing sequences... [0.003s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 8c5853401a6b05601e71e4bc98b8ba39 +Processed 32752 sequences, 10207231 letters. +Total time = 0.128s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl03/Original/Concatenated/Sr_rh_Gl03.AA.Concatenated.dmnd +Sequences = 32752 +Letters = 10207231 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.059s] +Masking queries... [0.036s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.022s] +Allocating buffers... [0s] +Loading reference sequences... [0.028s] +Masking reference... [0.025s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.019s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.026s] +Building query seed array... [0.016s] +Computing hash join... [0.007s] +Building seed filter... [0.004s] +Searching alignments... [0.1s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.021s] +Building query seed array... [0.014s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.09s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.015s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.015s] +Building query seed array... [0.017s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.082s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.015s] +Building query seed array... [0.015s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.019s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.07s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.022s] +Building query seed array... [0.018s] +Computing hash join... [0.005s] +Building seed filter... [0.005s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.014s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.067s] +Deallocating buffers... [0.001s] +Computing alignments... [1.422s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.681s +Reported 371577 pairwise alignments, 371643 HSPs. +30596 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Gl03 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Gl03 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Gl03 + + +There were 32752 ORFs originally, with 835 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 32752 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1335 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4863 Unique ORFs for Sr_rh_Gl03 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/Original/Concatenated/Sr_rh_Emar.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.016s] +Masking sequences... [0.022s] +Writing sequences... [0.005s] +Hashing sequences... [0s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 0377bff5727d8ea0665197857dd36934 +Processed 7943 sequences, 3401675 letters. +Total time = 0.048s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Emar/Original/Concatenated/Sr_rh_Emar.AA.Concatenated.dmnd +Sequences = 7943 +Letters = 3401675 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.017s] +Masking queries... [0.019s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.009s] +Masking reference... [0.017s] +Initializing temporary storage... [0.008s] +Building reference histograms... [0.009s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.03s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.006s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.029s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.012s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.011s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.024s] +Deallocating buffers... [0s] +Computing alignments... [0.248s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.847s +Reported 45208 pairwise alignments, 45211 HSPs. +5967 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Emar at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Emar + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Emar + + +There were 7943 ORFs originally, with 828 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 7943 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 195 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3036 Unique ORFs for Sr_rh_Emar + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/Original/Concatenated/Sr_rh_Nn02.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.028s] +Masking sequences... [0.026s] +Writing sequences... [0.009s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = fafb09591ab6392c934e41fce1e5cc78 +Processed 18186 sequences, 5834132 letters. +Total time = 0.07s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn02/Original/Concatenated/Sr_rh_Nn02.AA.Concatenated.dmnd +Sequences = 18186 +Letters = 5834132 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.024s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.017s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.01s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.021s] +Building query seed array... [0.018s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.183s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.026s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.121s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.025s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.125s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.026s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.112s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.021s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.142s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.027s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.12s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.023s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.16s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.025s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.147s] +Deallocating buffers... [0.001s] +Computing alignments... [0.831s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.506s +Reported 179318 pairwise alignments, 179321 HSPs. +14504 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn02 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn02 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn02 + + +There were 18186 ORFs originally, with 1966 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 18186 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1352 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4670 Unique ORFs for Sr_rh_Nn02 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/Original/Concatenated/Sr_rh_Nn10.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.035s] +Masking sequences... [0.032s] +Writing sequences... [0.01s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 2c8f05bd448e81fa11fb1f390a5a1b16 +Processed 20167 sequences, 6887392 letters. +Total time = 0.085s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn10/Original/Concatenated/Sr_rh_Nn10.AA.Concatenated.dmnd +Sequences = 20167 +Letters = 6887392 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.04s] +Masking queries... [0.025s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.022s] +Masking reference... [0.021s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.015s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.02s] +Building query seed array... [0.011s] +Computing hash join... [0.004s] +Building seed filter... [0.005s] +Searching alignments... [0.213s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.021s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.171s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.017s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.164s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.017s] +Building query seed array... [0.009s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.156s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.01s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.151s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.128s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.174s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.019s] +Building query seed array... [0.011s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.14s] +Deallocating buffers... [0.001s] +Computing alignments... [1.167s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.983s +Reported 236210 pairwise alignments, 236216 HSPs. +16852 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn10 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn10 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn10 + + +There were 20167 ORFs originally, with 2853 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 20167 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1214 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4580 Unique ORFs for Sr_rh_Nn10 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/Original/Concatenated/Sr_rh_Nn03.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.035s] +Masking sequences... [0.028s] +Writing sequences... [0.009s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 103564f9e7dd2f1f2bec1ec8db24c811 +Processed 17953 sequences, 5742700 letters. +Total time = 0.079s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn03/Original/Concatenated/Sr_rh_Nn03.AA.Concatenated.dmnd +Sequences = 17953 +Letters = 5742700 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.031s] +Masking queries... [0.026s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.013s] +Allocating buffers... [0s] +Loading reference sequences... [0.016s] +Masking reference... [0.017s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.012s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.022s] +Building query seed array... [0.017s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.095s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.02s] +Building query seed array... [0.024s] +Computing hash join... [0.004s] +Building seed filter... [0.007s] +Searching alignments... [0.079s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.024s] +Building query seed array... [0.023s] +Computing hash join... [0.005s] +Building seed filter... [0.007s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.076s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.065s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.061s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.075s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.021s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.068s] +Deallocating buffers... [0s] +Computing alignments... [0.684s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.832s +Reported 154410 pairwise alignments, 154413 HSPs. +14138 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn03 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn03 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn03 + + +There were 17953 ORFs originally, with 1458 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 17953 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1522 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4771 Unique ORFs for Sr_rh_Nn03 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/Original/Concatenated/Sr_rh_Nn06.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.032s] +Masking sequences... [0.029s] +Writing sequences... [0.01s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 9d688130f29a63e4d67352cdb9f99ac1 +Processed 19812 sequences, 6063731 letters. +Total time = 0.077s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn06/Original/Concatenated/Sr_rh_Nn06.AA.Concatenated.dmnd +Sequences = 19812 +Letters = 6063731 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.032s] +Masking queries... [0.029s] +Building query seed set... [0.035s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.02s] +Masking reference... [0.018s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.011s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.139s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.017s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.103s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.102s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.012s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.09s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.01s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.107s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.093s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.081s] +Deallocating buffers... [0s] +Computing alignments... [0.802s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.059s +Reported 193966 pairwise alignments, 193972 HSPs. +16000 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn06 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn06 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn06 + + +There were 19812 ORFs originally, with 1949 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 19812 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1501 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4947 Unique ORFs for Sr_rh_Nn06 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/Original/Concatenated/Sr_rh_Bv02.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.034s] +Masking sequences... [0.029s] +Writing sequences... [0.011s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 96b1fdedb1bc7f9735f8d6a9c060b359 +Processed 24278 sequences, 6856585 letters. +Total time = 0.081s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv02/Original/Concatenated/Sr_rh_Bv02.AA.Concatenated.dmnd +Sequences = 24278 +Letters = 6856585 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.047s] +Masking queries... [0.031s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.02s] +Allocating buffers... [0s] +Loading reference sequences... [0.021s] +Masking reference... [0.02s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.01s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.016s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.088s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.024s] +Building query seed array... [0.027s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.077s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.071s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.019s] +Building query seed array... [0.026s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.014s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.066s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.023s] +Building query seed array... [0.02s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.062s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.02s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.06s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.018s] +Building query seed array... [0.02s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.058s] +Deallocating buffers... [0.001s] +Computing alignments... [1.001s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.159s +Reported 274535 pairwise alignments, 274572 HSPs. +20473 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv02 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv02 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv02 + + +There were 24278 ORFs originally, with 1366 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 24278 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1535 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5142 Unique ORFs for Sr_rh_Bv02 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/Original/Concatenated/Sr_rh_Gl02.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.055s] +Masking sequences... [0.042s] +Writing sequences... [0.017s] +Hashing sequences... [0.003s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 9fd3ebe1fc02a1cc4b69bb7b2be9a35d +Processed 31505 sequences, 11021837 letters. +Total time = 0.123s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl02/Original/Concatenated/Sr_rh_Gl02.AA.Concatenated.dmnd +Sequences = 31505 +Letters = 11021837 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.057s] +Masking queries... [0.034s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.021s] +Allocating buffers... [0s] +Loading reference sequences... [0.028s] +Masking reference... [0.028s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.02s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.026s] +Building query seed array... [0.019s] +Computing hash join... [0.008s] +Building seed filter... [0.003s] +Searching alignments... [0.108s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.017s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.017s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.08s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.014s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.089s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.012s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.081s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.013s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.076s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.014s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.016s] +Computing hash join... [0.006s] +Building seed filter... [0.004s] +Searching alignments... [0.078s] +Deallocating buffers... [0.001s] +Computing alignments... [1.574s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.849s +Reported 351084 pairwise alignments, 351138 HSPs. +29361 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Gl02 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Gl02 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Gl02 + + +There were 31505 ORFs originally, with 925 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 31505 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1258 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4912 Unique ORFs for Sr_rh_Gl02 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/Original/Concatenated/Sr_rh_Esca.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.018s] +Masking sequences... [0.125s] +Writing sequences... [0.005s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = a6ba9de57b7334a769b88bd0f22acf5b +Processed 11092 sequences, 3356704 letters. +Total time = 0.154s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Esca/Original/Concatenated/Sr_rh_Esca.AA.Concatenated.dmnd +Sequences = 11092 +Letters = 3356704 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.021s] +Masking queries... [0.021s] +Building query seed set... [0.021s] +Algorithm: Double-indexed +Building query histograms... [0.01s] +Allocating buffers... [0s] +Loading reference sequences... [0.012s] +Masking reference... [0.013s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.007s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.036s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.013s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.01s] +Building query seed array... [0.013s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.01s] +Building query seed array... [0.012s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.011s] +Building query seed array... [0.009s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.021s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.011s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.022s] +Deallocating buffers... [0s] +Computing alignments... [0.27s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.867s +Reported 80939 pairwise alignments, 80941 HSPs. +8644 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Esca at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Esca + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Esca + + +There were 11092 ORFs originally, with 697 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 11092 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 491 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3480 Unique ORFs for Sr_rh_Esca + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/Original/Concatenated/Sr_rh_Bv03.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.032s] +Masking sequences... [0.028s] +Writing sequences... [0.011s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 7d2fb8d327ed29a308fc6adf9481fe27 +Processed 23147 sequences, 6331934 letters. +Total time = 0.079s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv03/Original/Concatenated/Sr_rh_Bv03.AA.Concatenated.dmnd +Sequences = 23147 +Letters = 6331934 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.033s] +Masking queries... [0.026s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.019s] +Allocating buffers... [0s] +Loading reference sequences... [0.017s] +Masking reference... [0.02s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.012s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.018s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.084s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.074s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.07s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.018s] +Building query seed array... [0.021s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.067s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.021s] +Building query seed array... [0.021s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.058s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.019s] +Building query seed array... [0.019s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.059s] +Deallocating buffers... [0.001s] +Computing alignments... [0.952s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.056s +Reported 269959 pairwise alignments, 269963 HSPs. +19405 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv03 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv03 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv03 + + +There were 23147 ORFs originally, with 1552 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 23147 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1372 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5015 Unique ORFs for Sr_rh_Bv03 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/Original/Concatenated/Sr_rh_Sspa.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.026s] +Masking sequences... [0.025s] +Writing sequences... [0.007s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 73ca9ebb7bc1f11c92eff313afe8d09e +Processed 15066 sequences, 4394283 letters. +Total time = 0.064s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Sspa/Original/Concatenated/Sr_rh_Sspa.AA.Concatenated.dmnd +Sequences = 15066 +Letters = 4394283 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.025s] +Masking queries... [0.022s] +Building query seed set... [0.031s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.017s] +Initializing temporary storage... [0.011s] +Building reference histograms... [0.012s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.017s] +Building query seed array... [0.023s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.032s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.012s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.028s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.027s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.011s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.024s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.011s] +Building query seed array... [0.013s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.025s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.026s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.023s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.027s] +Deallocating buffers... [0s] +Computing alignments... [0.306s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.987s +Reported 81557 pairwise alignments, 81557 HSPs. +11553 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Sspa at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Sspa + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Sspa + + +There were 15066 ORFs originally, with 934 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 15066 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 955 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5007 Unique ORFs for Sr_rh_Sspa + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/Original/Concatenated/Sr_rh_Halb.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.028s] +Masking sequences... [0.027s] +Writing sequences... [0.009s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = be446b493c85f79b57f24c5868ac0c4c +Processed 14869 sequences, 6087712 letters. +Total time = 0.071s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Halb/Original/Concatenated/Sr_rh_Halb.AA.Concatenated.dmnd +Sequences = 14869 +Letters = 6087712 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.029s] +Masking queries... [0.024s] +Building query seed set... [0.027s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.015s] +Masking reference... [0.018s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.013s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.086s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.075s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.017s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.062s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.065s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.022s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.059s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.014s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.02s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.062s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.056s] +Deallocating buffers... [0s] +Computing alignments... [0.649s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.682s +Reported 153389 pairwise alignments, 153474 HSPs. +13522 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Halb at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Halb + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Halb + + +There were 14869 ORFs originally, with 3233 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 14869 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 309 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 2652 Unique ORFs for Sr_rh_Halb + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/Original/Concatenated/Sr_rh_Bv04.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.033s] +Masking sequences... [0.03s] +Writing sequences... [0.011s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 7d1e6c82caa1170cabd2b9c12a903852 +Processed 21474 sequences, 6727927 letters. +Total time = 0.082s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Bv04/Original/Concatenated/Sr_rh_Bv04.AA.Concatenated.dmnd +Sequences = 21474 +Letters = 6727927 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.04s] +Masking queries... [0.03s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.016s] +Allocating buffers... [0s] +Loading reference sequences... [0.018s] +Masking reference... [0.021s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.014s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.018s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.081s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.027s] +Building query seed array... [0.02s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.07s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.02s] +Building query seed array... [0.021s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.068s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.017s] +Building query seed array... [0.019s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.057s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.018s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.063s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.019s] +Building query seed array... [0.022s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.072s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.019s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.051s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.015s] +Building query seed array... [0.018s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.055s] +Deallocating buffers... [0s] +Computing alignments... [1.096s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.197s +Reported 262985 pairwise alignments, 263041 HSPs. +18915 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Bv04 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Bv04 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Bv04 + + +There were 21474 ORFs originally, with 1156 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 21474 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 848 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4346 Unique ORFs for Sr_rh_Bv04 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/Original/Concatenated/Sr_rh_Nn01.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.034s] +Masking sequences... [0.031s] +Writing sequences... [0.01s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = d53a7002a89335cfb90dd79e5cf900d7 +Processed 22715 sequences, 6753119 letters. +Total time = 0.083s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn01/Original/Concatenated/Sr_rh_Nn01.AA.Concatenated.dmnd +Sequences = 22715 +Letters = 6753119 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.036s] +Masking queries... [0.029s] +Building query seed set... [0.036s] +Algorithm: Double-indexed +Building query histograms... [0.014s] +Allocating buffers... [0s] +Loading reference sequences... [0.019s] +Masking reference... [0.02s] +Initializing temporary storage... [0.01s] +Building reference histograms... [0.013s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.012s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.214s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.022s] +Building query seed array... [0.014s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.154s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.018s] +Building query seed array... [0.016s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.173s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.018s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.15s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.016s] +Building query seed array... [0.013s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.149s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.014s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.123s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.142s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.014s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.132s] +Deallocating buffers... [0.001s] +Computing alignments... [0.996s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.749s +Reported 234923 pairwise alignments, 234933 HSPs. +18619 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn01 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn01 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn01 + + +There were 22715 ORFs originally, with 2722 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 22715 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1826 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5092 Unique ORFs for Sr_rh_Nn01 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/Original/Concatenated/Sr_rh_Tx01.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.035s] +Masking sequences... [0.03s] +Writing sequences... [0.011s] +Hashing sequences... [0.002s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 5c42afa531e715892722c3aab558955f +Processed 19985 sequences, 6911292 letters. +Total time = 0.084s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Tx01/Original/Concatenated/Sr_rh_Tx01.AA.Concatenated.dmnd +Sequences = 19985 +Letters = 6911292 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.035s] +Masking queries... [0.027s] +Building query seed set... [0.029s] +Algorithm: Double-indexed +Building query histograms... [0.015s] +Allocating buffers... [0s] +Loading reference sequences... [0.019s] +Masking reference... [0.02s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.014s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.018s] +Building query seed array... [0.01s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.123s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.104s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.015s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.099s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.096s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.094s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.013s] +Building query seed array... [0.011s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.088s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.014s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.098s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.013s] +Building query seed array... [0.012s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.099s] +Deallocating buffers... [0.001s] +Computing alignments... [0.886s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 2.149s +Reported 249989 pairwise alignments, 250001 HSPs. +18701 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Tx01 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Tx01 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Tx01 + + +There were 19985 ORFs originally, with 5250 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 19985 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 353 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 3027 Unique ORFs for Sr_rh_Tx01 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/Original/Concatenated/Sr_rh_Nn09.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.043s] +Masking sequences... [0.034s] +Writing sequences... [0.014s] +Hashing sequences... [0.003s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 442d2660d19d6736d72315a56ba66c98 +Processed 30042 sequences, 8360487 letters. +Total time = 0.1s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn09/Original/Concatenated/Sr_rh_Nn09.AA.Concatenated.dmnd +Sequences = 30042 +Letters = 8360487 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.055s] +Masking queries... [0.036s] +Building query seed set... [0.034s] +Algorithm: Double-indexed +Building query histograms... [0.022s] +Allocating buffers... [0s] +Loading reference sequences... [0.023s] +Masking reference... [0.023s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.016s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.023s] +Building query seed array... [0.016s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.342s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.018s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.283s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.019s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.298s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.02s] +Building query seed array... [0.016s] +Computing hash join... [0.005s] +Building seed filter... [0.003s] +Searching alignments... [0.243s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.019s] +Building query seed array... [0.012s] +Computing hash join... [0.005s] +Building seed filter... [0.004s] +Searching alignments... [0.269s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.023s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.225s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.021s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.251s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.022s] +Building query seed array... [0.012s] +Computing hash join... [0.004s] +Building seed filter... [0.004s] +Searching alignments... [0.239s] +Deallocating buffers... [0.001s] +Computing alignments... [1.535s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0.001s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 4.28s +Reported 355048 pairwise alignments, 355055 HSPs. +25203 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn09 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn09 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn09 + + +There were 30042 ORFs originally, with 4549 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 30042 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 2368 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 5956 Unique ORFs for Sr_rh_Nn09 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/Original/Concatenated/Sr_rh_Gl01.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.003s] +Masking sequences... [0.016s] +Writing sequences... [0s] +Hashing sequences... [0s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 527bc5ca7392445b7844cb181d0620f1 +Processed 1224 sequences, 170624 letters. +Total time = 0.023s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Gl01/Original/Concatenated/Sr_rh_Gl01.AA.Concatenated.dmnd +Sequences = 1224 +Letters = 170624 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.001s] +Masking queries... [0.014s] +Building query seed set... [0.004s] +Algorithm: Double-indexed +Building query histograms... [0.002s] +Allocating buffers... [0s] +Loading reference sequences... [0.001s] +Masking reference... [0.007s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.003s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.013s] +Building query seed array... [0.014s] +Computing hash join... [0.002s] +Building seed filter... [0.007s] +Searching alignments... [0.007s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.008s] +Building query seed array... [0.004s] +Computing hash join... [0.002s] +Building seed filter... [0.004s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.007s] +Computing hash join... [0.003s] +Building seed filter... [0.005s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.009s] +Building query seed array... [0.009s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.005s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.011s] +Building query seed array... [0.005s] +Computing hash join... [0.001s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.011s] +Building query seed array... [0.008s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.007s] +Building query seed array... [0.006s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.009s] +Building query seed array... [0.006s] +Computing hash join... [0.002s] +Building seed filter... [0.003s] +Searching alignments... [0.004s] +Deallocating buffers... [0s] +Computing alignments... [0.036s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0.001s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 0.331s +Reported 4157 pairwise alignments, 4157 HSPs. +594 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Gl01 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Gl01 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Gl01 + + +There were 1224 ORFs originally, with 32 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 1224 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 44 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 667 Unique ORFs for Sr_rh_Gl01 + + +Next Script is: 6b_update_cov_post_removepartials.py + + +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 64 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Database file: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/Original/Concatenated/Sr_rh_Nn14.AA.Concatenated.fasta +Opening the database file... [0.002s] +Loading sequences... [0.028s] +Masking sequences... [0.027s] +Writing sequences... [0.01s] +Hashing sequences... [0.001s] +Loading sequences... [0s] +Writing trailer... [0s] +Closing the input file... [0s] +Closing the database file... [0s] +Database hash = 2cefd71c2b2dfc7c97c47bb5141899b8 +Processed 17441 sequences, 5176642 letters. +Total time = 0.071s +diamond v0.9.30.131 (C) Max Planck Society for the Advancement of Science +Documentation, support and updates available at http://www.diamondsearch.org + +#CPU threads: 60 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +Temporary directory: /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/Original/Concatenated/SpreadSheets +Opening the database... [0.001s] +#Target sequences to report alignments for: 25 +Reference = /beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams//Output/Sr_rh_Nn14/Original/Concatenated/Sr_rh_Nn14.AA.Concatenated.dmnd +Sequences = 17441 +Letters = 5176642 +Block size = 2000000000 +Opening the input file... [0s] +Opening the output file... [0s] +Loading query sequences... [0.027s] +Masking queries... [0.025s] +Building query seed set... [0.033s] +Algorithm: Double-indexed +Building query histograms... [0.012s] +Allocating buffers... [0s] +Loading reference sequences... [0.014s] +Masking reference... [0.017s] +Initializing temporary storage... [0.009s] +Building reference histograms... [0.015s] +Allocating buffers... [0s] +Processing query block 0, reference block 0, shape 0, index chunk 0. +Building reference seed array... [0.021s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.055s] +Processing query block 0, reference block 0, shape 0, index chunk 1. +Building reference seed array... [0.014s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 0, index chunk 2. +Building reference seed array... [0.015s] +Building query seed array... [0.015s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.052s] +Processing query block 0, reference block 0, shape 0, index chunk 3. +Building reference seed array... [0.019s] +Building query seed array... [0.017s] +Computing hash join... [0.004s] +Building seed filter... [0.003s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 0. +Building reference seed array... [0.014s] +Building query seed array... [0.02s] +Computing hash join... [0.004s] +Building seed filter... [0.005s] +Searching alignments... [0.042s] +Processing query block 0, reference block 0, shape 1, index chunk 1. +Building reference seed array... [0.016s] +Building query seed array... [0.017s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.038s] +Processing query block 0, reference block 0, shape 1, index chunk 2. +Building reference seed array... [0.016s] +Building query seed array... [0.018s] +Computing hash join... [0.003s] +Building seed filter... [0.004s] +Searching alignments... [0.04s] +Processing query block 0, reference block 0, shape 1, index chunk 3. +Building reference seed array... [0.017s] +Building query seed array... [0.016s] +Computing hash join... [0.003s] +Building seed filter... [0.003s] +Searching alignments... [0.041s] +Deallocating buffers... [0s] +Computing alignments... [0.496s] +Deallocating reference... [0s] +Loading reference sequences... [0s] +Deallocating buffers... [0s] +Deallocating queries... [0s] +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database file... [0s] +Deallocating taxonomy... [0s] +Total time = 1.358s +Reported 124839 pairwise alignments, 124844 HSPs. +13517 queries aligned. + + +Merging Transcriptome data together. + + +Binning ALL Nucleotide ORFs for Sr_rh_Nn14 at 98% identity. + + + + +Removing Partial ORFs with >98% Nucleotide Identity over >70% of +their length when compared to more complete ORFs from: Sr_rh_Nn14 + + +Removing Abnormally Short (70% length) OR Long (200% length) ORFs +compared to typical Gene Family member length for: Sr_rh_Nn14 + + +There were 17441 ORFs originally, with 663 Partial ORFs that +were 100% Identical to larger ORFs. + + +Of the 17441 original ORFs, 0 are Partial ORFs (e.g. > 98% +NUCLEOTIDE identity) to larger ORFs with 1588 additional ORFs +that were either TOO LONG or SHORT. + + +Overall, there are 4906 Unique ORFs for Sr_rh_Nn14 + + +Next Script is: 6b_update_cov_post_removepartials.py + + + + +Renaming Sr_rh_Esca's files with the following 10-character +code: Sr_rh_Esca + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Esca_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Esca files can be +found in the Sr_rh_Esca and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn05's files with the following 10-character +code: Sr_rh_Nn05 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn05_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn05 files can be +found in the Sr_rh_Nn05 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv04's files with the following 10-character +code: Sr_rh_Bv04 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv04_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv04 files can be +found in the Sr_rh_Bv04 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn10's files with the following 10-character +code: Sr_rh_Nn10 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn10_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn10 files can be +found in the Sr_rh_Nn10 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv03's files with the following 10-character +code: Sr_rh_Bv03 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv03_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv03 files can be +found in the Sr_rh_Bv03 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn14's files with the following 10-character +code: Sr_rh_Nn14 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn14_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn14 files can be +found in the Sr_rh_Nn14 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn03's files with the following 10-character +code: Sr_rh_Nn03 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn03_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn03 files can be +found in the Sr_rh_Nn03 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv02's files with the following 10-character +code: Sr_rh_Bv02 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv02_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv02 files can be +found in the Sr_rh_Bv02 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn07's files with the following 10-character +code: Sr_rh_Nn07 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn07_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn07 files can be +found in the Sr_rh_Nn07 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Hhir's files with the following 10-character +code: Sr_rh_Hhir + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Hhir_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Hhir files can be +found in the Sr_rh_Hhir and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Gl01's files with the following 10-character +code: Sr_rh_Gl01 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Gl01_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Gl01 files can be +found in the Sr_rh_Gl01 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Gsp1's files with the following 10-character +code: Sr_rh_Gsp1 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Gsp1_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Gsp1 files can be +found in the Sr_rh_Gsp1 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Rsp1's files with the following 10-character +code: Sr_rh_Rsp1 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Rsp1_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Rsp1 files can be +found in the Sr_rh_Rsp1 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Calb's files with the following 10-character +code: Sr_rh_Calb + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Calb_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Calb files can be +found in the Sr_rh_Calb and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn09's files with the following 10-character +code: Sr_rh_Nn09 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn09_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn09 files can be +found in the Sr_rh_Nn09 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn08's files with the following 10-character +code: Sr_rh_Nn08 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn08_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn08 files can be +found in the Sr_rh_Nn08 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn01's files with the following 10-character +code: Sr_rh_Nn01 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn01_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn01 files can be +found in the Sr_rh_Nn01 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv06's files with the following 10-character +code: Sr_rh_Bv06 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv06_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv06 files can be +found in the Sr_rh_Bv06 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn02's files with the following 10-character +code: Sr_rh_Nn02 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn02_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn02 files can be +found in the Sr_rh_Nn02 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv05's files with the following 10-character +code: Sr_rh_Bv05 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv05_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv05 files can be +found in the Sr_rh_Bv05 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Sspa's files with the following 10-character +code: Sr_rh_Sspa + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Sspa_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Sspa files can be +found in the Sr_rh_Sspa and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Hind's files with the following 10-character +code: Sr_rh_Hind + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Hind_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Hind files can be +found in the Sr_rh_Hind and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn12's files with the following 10-character +code: Sr_rh_Nn12 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn12_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn12 files can be +found in the Sr_rh_Nn12 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Gspa's files with the following 10-character +code: Sr_rh_Gspa + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Gspa_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Gspa files can be +found in the Sr_rh_Gspa and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Bv01's files with the following 10-character +code: Sr_rh_Bv01 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Bv01_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Bv01 files can be +found in the Sr_rh_Bv01 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Tx01's files with the following 10-character +code: Sr_rh_Tx01 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Tx01_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Tx01 files can be +found in the Sr_rh_Tx01 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn06's files with the following 10-character +code: Sr_rh_Nn06 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn06_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn06 files can be +found in the Sr_rh_Nn06 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn04's files with the following 10-character +code: Sr_rh_Nn04 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn04_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn04 files can be +found in the Sr_rh_Nn04 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Gl02's files with the following 10-character +code: Sr_rh_Gl02 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Gl02_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Gl02 files can be +found in the Sr_rh_Gl02 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Slin's files with the following 10-character +code: Sr_rh_Slin + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Slin_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Slin files can be +found in the Sr_rh_Slin and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Emar's files with the following 10-character +code: Sr_rh_Emar + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Emar_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Emar files can be +found in the Sr_rh_Emar and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn13's files with the following 10-character +code: Sr_rh_Nn13 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn13_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn13 files can be +found in the Sr_rh_Nn13 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Emac's files with the following 10-character +code: Sr_rh_Emac + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Emac_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Emac files can be +found in the Sr_rh_Emac and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Halb's files with the following 10-character +code: Sr_rh_Halb + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Halb_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Halb files can be +found in the Sr_rh_Halb and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Atps's files with the following 10-character +code: Sr_rh_Atps + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Atps_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Atps files can be +found in the Sr_rh_Atps and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Nn11's files with the following 10-character +code: Sr_rh_Nn11 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Nn11_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Nn11 files can be +found in the Sr_rh_Nn11 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Gl03's files with the following 10-character +code: Sr_rh_Gl03 + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Gl03_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Gl03 files can be +found in the Sr_rh_Gl03 and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + + + +Renaming Sr_rh_Usac's files with the following 10-character +code: Sr_rh_Usac + + +Renaming Translated (Protein) ORFs + + +Renaming Nucleotide ORFs + + +Updating CDS Names in the Spreadsheet + + +Converting Sr_rh_Usac_Filtered.UC.Final.AA.ORF.fasta to XML format + + +There is no next script! The final Sr_rh_Usac files can be +found in the Sr_rh_Usac and ReadyToGo folders and are ready +for the KatzLab Phylogenomic Tree-Building Steps! + + +Traceback (most recent call last): + File "8_SummaryStats.py", line 8, in + import matplotlib.pyplot as plt +ModuleNotFoundError: No module named 'matplotlib' + +Running script 1... + + +Running script 2... + + +Running script 3... + + +Running script 4... + + +Running script 5... + + +Running script 6... + + +Running script 7... + diff --git a/PTL1/Transcriptomes/Scripts/UnexpexctedShortStuffBlameXyrus.txt b/PTL1/Transcriptomes/Scripts/UnexpexctedShortStuffBlameXyrus.txt new file mode 100644 index 0000000..4ce2676 --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/UnexpexctedShortStuffBlameXyrus.txt @@ -0,0 +1,143 @@ +Contig_204_Len5938_Cov7976_P_OG6_102770 +Contig_3323_Len1201_Cov3_U_OG6_148486 +Contig_313_Len4904_Cov3_E_OG6_101730 +Contig_3269_Len1219_Cov2_U_OG6_102957 +Contig_9180_Len427_Cov1_P_OG6_100774 +Contig_4698_Len874_Cov1_U_OG6_111564 +Contig_6408_Len634_Cov2_U_OG6_102446 +Contig_667_Len3323_Cov11_U_OG6_116443 +Contig_2271_Len1620_Cov1_E_OG6_101536 +Contig_4504_Len909_Cov11_P_OG6_101629 +Contig_333_Len4757_Cov12241_P_OG6_102770 +Contig_811_Len2973_Cov4_U_OG6_116443 +Contig_4707_Len871_Cov7779_P_OG6_102770 +Contig_5488_Len751_Cov2_U_OG6_103872 +Contig_317_Len4879_Cov6_E_OG6_107629 +Contig_5624_Len732_Cov1_U_OG6_100648 +Contig_1895_Len1829_Cov9_U_OG6_107629 +Contig_6636_Len610_Cov1_P_OG6_100598 +Contig_4338_Len944_Cov2_U_OG6_118109 +Contig_3061_Len1290_Cov2_P_OG6_105259 +Contig_786_Len3043_Cov3_E_OG6_101725 +Contig_4267_Len958_Cov2_U_OG6_107629 +Contig_157_Len6644_Cov11_E_OG6_102358 +Contig_24_Len10475_Cov4_E_OG6_106943 +Contig_145_Len6822_Cov9_U_OG6_145244 +Contig_536_Len3701_Cov3_E_OG6_103917 +Contig_421_Len4239_Cov31_P_OG6_102198 +Contig_2102_Len1703_Cov4_E_OG6_102675 +Contig_40_Len9764_Cov77_E_OG6_105576 +Contig_4490_Len912_Cov1_P_OG6_106264 +Contig_6491_Len626_Cov2_P_OG6_100328 +Contig_7231_Len557_Cov1_U_OG6_107629 +Contig_756_Len3094_Cov12165_P_OG6_102770 +Contig_3009_Len1307_Cov13_U_OG6_116443 +Contig_4049_Len1006_Cov3_U_OG6_103879 +Contig_426_Len4213_Cov26_U_OG6_108411 +Contig_5227_Len788_Cov1_P_OG6_100289 +Contig_701_Len3225_Cov2_P_OG6_106492 +Contig_4905_Len840_Cov2_P_OG6_115036 +Contig_28_Len10287_Cov699_E_OG6_100777 +Contig_117_Len7490_Cov12_E_OG6_102035 +Contig_748_Len3109_Cov7889_P_OG6_102770 +Contig_3013_Len1306_Cov6_U_OG6_116443 +Contig_5988_Len682_Cov2_U_OG6_107629 +Contig_2349_Len1580_Cov1_P_OG6_124813 +Contig_349_Len4667_Cov4_U_OG6_116443 +Contig_841_Len2909_Cov21_E_OG6_107629 +Contig_8720_Len444_Cov1_P_OG6_115892 +Contig_2350_Len1578_Cov8_U_OG6_107629 +Contig_7003_Len576_Cov3_P_OG6_115725 +Contig_8605_Len452_Cov1_U_OG6_107629 +Contig_3057_Len1292_Cov6_U_OG6_102073 +Contig_168_Len6512_Cov4_E_OG6_102446 +Contig_3018_Len1303_Cov9938_P_OG6_102770 +Contig_3148_Len1260_Cov6057_P_OG6_102770 +Contig_2866_Len1358_Cov8209_P_OG6_102770 +Contig_1129_Len2472_Cov5_E_OG6_100425 +Contig_4087_Len998_Cov2_E_OG6_102109 +Contig_9116_Len429_Cov2_E_OG6_100420 +Contig_2878_Len1356_Cov2_E_OG6_102565 +Contig_1436_Len2164_Cov2_U_OG6_121413 +Contig_125_Len7242_Cov67_U_OG6_105015 +Contig_4250_Len962_Cov4_U_OG6_107629 +Contig_9689_Len411_Cov1_P_OG6_110848 +Contig_488_Len3892_Cov12068_P_OG6_102770 +Contig_5963_Len686_Cov1_P_OG6_101423 +Contig_5913_Len692_Cov3_U_OG6_116443 +Contig_1768_Len1907_Cov2_P_OG6_106950 +Contig_1477_Len2126_Cov2_P_OG6_104145 +Contig_7577_Len529_Cov1_E_OG6_116443 +Contig_9982_Len402_Cov2_P_OG6_104843 +Contig_373_Len4515_Cov26_U_OG6_108411 +Contig_4656_Len883_Cov2_E_OG6_102774 +Contig_3145_Len1262_Cov2_P_OG6_110223 +Contig_5222_Len788_Cov2_E_OG6_116443 +Contig_6139_Len665_Cov1_P_OG6_129320 +Contig_987_Len2641_Cov4_E_OG6_103026 +Contig_213_Len5837_Cov11847_P_OG6_102770 +Contig_5568_Len739_Cov1_E_OG6_102109 +Contig_3238_Len1232_Cov5_U_OG6_116443 +Contig_1549_Len2061_Cov3_E_OG6_102774 +Contig_3274_Len1217_Cov2_E_OG6_107629 +Contig_6478_Len627_Cov4_U_OG6_107219 +Contig_1710_Len1945_Cov2_P_OG6_101690 +Contig_172_Len6427_Cov11653_P_OG6_102770 +Contig_2759_Len1398_Cov2_U_OG6_102446 +Contig_4836_Len848_Cov10167_E_OG6_101051 +Contig_665_Len3326_Cov6_E_OG6_103961 +Contig_7896_Len504_Cov1_U_OG6_116443 +Contig_6369_Len640_Cov2_U_OG6_107629 +Contig_218_Len5773_Cov12260_P_OG6_102770 +Contig_250_Len5432_Cov11_U_OG6_102109 +Contig_1503_Len2102_Cov2_P_OG6_104631 +Contig_1781_Len1898_Cov4_E_OG6_103110 +Contig_7718_Len517_Cov4_U_OG6_107629 +Contig_2524_Len1486_Cov10283_U_OG6_102770 +Contig_207_Len5911_Cov4_P_OG6_104171 +Contig_1989_Len1770_Cov9572_P_OG6_102770 +Contig_6893_Len587_Cov1_P_OG6_103083 +Contig_1403_Len2189_Cov6_E_OG6_100617 +Contig_4699_Len873_Cov2_U_OG6_102109 +Contig_7520_Len533_Cov1_P_OG6_103438 +Contig_115_Len7578_Cov15_U_OG6_107629 +Contig_1732_Len1931_Cov2_E_OG6_101536 +Contig_6482_Len627_Cov1_P_OG6_101673 +Contig_849_Len2894_Cov3_U_OG6_116443 +Contig_3913_Len1037_Cov5_U_OG6_107629 +Contig_9808_Len408_Cov1_P_OG6_113435 +Contig_439_Len4172_Cov3_E_OG6_107629 +Contig_749_Len3109_Cov7_U_OG6_101143 +Contig_376_Len4497_Cov28_E_OG6_116443 +Contig_1579_Len2041_Cov3_E_OG6_103026 +Contig_787_Len3042_Cov8_U_OG6_101143 +Contig_4099_Len996_Cov1_U_OG6_105091 +Contig_8084_Len489_Cov1_P_OG6_101427 +Contig_2431_Len1534_Cov37_E_OG6_101143 +Contig_110_Len7741_Cov24_U_OG6_101172 +Contig_4412_Len930_Cov1_P_OG6_101345 +Contig_7241_Len556_Cov1_E_OG6_107629 +Contig_574_Len3608_Cov16_P_OG6_107278 +Contig_739_Len3127_Cov4_E_OG6_102109 +Contig_858_Len2877_Cov9661_P_OG6_102770 +Contig_3857_Len1055_Cov1_U_OG6_100774 +Contig_7308_Len551_Cov1_P_OG6_100463 +Contig_61_Len9077_Cov26_E_OG6_101172 +Contig_5256_Len784_Cov1_U_OG6_107629 +Contig_6823_Len592_Cov1_E_OG6_100210 +Contig_2634_Len1443_Cov2_U_OG6_102109 +Contig_2511_Len1491_Cov2_P_OG6_118575 +Contig_3656_Len1108_Cov2_P_OG6_106027 +Contig_7423_Len542_Cov1_U_OG6_132867 +Contig_889_Len2819_Cov18_U_OG6_116443 +Contig_5665_Len726_Cov1_E_OG6_100769 +Contig_3821_Len1064_Cov2_P_OG6_100469 +Contig_6635_Len610_Cov1_P_OG6_100578 +Contig_573_Len3614_Cov53_U_OG6_102121 +Contig_7211_Len558_Cov3_U_OG6_101143 +Contig_1293_Len2294_Cov87_E_OG6_106711 +Contig_131_Len7117_Cov11286_P_OG6_102770 +Contig_5544_Len742_Cov1_P_OG6_111848 +Contig_806_Len2987_Cov21_E_OG6_103192 +Contig_219_Len5773_Cov12245_P_OG6_102770 +Contig_136_Len7040_Cov8_U_OG6_100080 diff --git a/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-37.pyc b/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1927e559bbbeee04eff8f8c9667eb26c6e42716d GIT binary patch literal 17514 zcmdUWX>eTEb>`c8#X=B-xQl90l*E=OfSYAo(a3_D1WZSUBmy$&j^%c<-viiSqZ_#I zfh0POv18D-oSIBDH8Y;n6jKGHDr-5;OpRUR?9RT-B&lqb%FE^~Kf=oPBmeX#`M&es zeccTL5gjE}2~d6a_1v@FbI*HeXee*s@AuAq+Wq@)F^oTFM*j^V@gjbH)-nv&C>yTn zS`D*oN^F%aiS4p2u~SYVwj1fCOgUq+t<%UZ<;popryBXCf%1T9OdD?6&D=5E%!XAS zL@w**kjr^P8+LgZ`Mf)T`~YhAAUEg^AvYwoBkr)f=Z;a{>yEg45s$i~?moo(+%fkd z#AEJ$_Wz7~Os5 zkWoIiHZ18QNI!DLXdX3DEAx!^w*RQnw5}R)+X=KSpzTQ1)_uHOi0ZvD@0eErBp&tl zdXGr|(WvBOR{41OF>lm+l=aA+KyIIR+DNqxaXi)~r!{Z~HTo^`$~dQd^ zsi0L@@S2{g2411j3Vb%HwcJ*-upgzA=Z!Vyj zH^15_)UH=mwHA0PZam*o1=lMyTQ^&US1wN#YSl&q&F>LSXNqQ+n)OyLy@G^|1R`s? zRAZg~1M)^_{5~W+1L$iIb%C~)YXeOj*Kt#bQxRg8(-BIRGZ8A5v+f|aJrrh2)kdxK z`b+K4<_q|{>;I?NI_PSP>SFNSO738l6g){Ge|LJk*STo}t za)T-SO8EKDA?SemZa8WLv30~)H(a}8;4~fN?R}{07zZVFz&L{Pbu%zJrkmQ}>85Ay zaoBq<0}g!eVFnHYf?Bm!n{Lqx?Se&H1IK2ITSw4cx$Z>}~P0L^si z-1zg27O2600SVnj5d;gDzk2L5fA!keKd|tvV_%y7)ApSY-1`UoC2O|Ph6|B3pI`^B z`RK>2b#vV!Hh@zCGmx@2%mYTWgQEGinIoC`fhyW+1l>Zj9-52WHlRjPGmc-p@?=(S zgfq*m>@Krr*8!y-7|Cb=6dS0$42WD^_-r5ko`>q96I#vXavGeYx!{F%yS@x$Wx8mV z9YSo|C}F+}#v%sri8Y3O&^&CmNAHJpvWFa6W!MrwKMhz%2(~Whb;nq@>c+Z#d;U#x z)7UaD7;lsTQ)fNZNx7**M#tVXw~QMZ^&#Zbhl~ZYV@Ea58!N}6GF@_r?T+bO%0Tb9 z&wDXB+g~B7X76xRnG0y0zWs5Xd*gC6vzzNUH`3~dR=(@@3!M~J^z7%2&l#UH=PhAx zS*#Rm5~9d-(t*`UZJ08f*~x5}3uZjq|GHwVeAg9{!(iZatWFvjGPGfBSnJtN76dWe zB?y9s(A-B5b!o5@O(M)@!~DFl_SBm|u+JIu1f6v!+cCL_4RZ{P>Y(wagkWA)k6=u# zTajSwM64<~6=D`ylwj5v5`tNS#4Hz()A*~%N&4`M^tuf*1i#*ka!q(2(C7eCt0kPN zOMCNGHi#;^rCo~bTP3MZ0CN-xS>+g48DPU3<`FQWef_H?)17a$s)42mt{8<*vr?^Y zBZ}v{i&i`UVFg04t4l>Y%vGzE8u)kMhv_)B=A$u+P>I-q{3jgj-OzcC$zsDEwz8IG zK4qrOyxD&G8vsfI01^&=a!gGmTyx#L?RJcqSgrzPt#vy9&!%MV2(R8S1FJu886>H- z!A`2<07KF}7~-s_V+=8gq({in&6VLRU}-?o)Qn(2vYtM)If(Vhfs90GbHi4SW=wz& z+3EwK&LuR|sB`fhpaeD^;Z5b*TIGTV^=T&rlEcISrs{#WuOA(6pe6`|M zgKB6)4u|HA(7ah5YPOn{wx?Q^n~)9V!6fUiE|s161aE|`%88HOzm7@nf{ffqJLahQ zglUP~IyjFM&f@gSVMtBn63IJZe< zk+VTG>jD_j?Xx0bpGqVwZ@Swg?55Qb$;zA7%J)#BQkDh|)XfZ7UOEzj$me?VN1&-g z(psG?7@D>67W$<+kl>=T4|HWOg&==|q*F*l>WP-UB56TeBJQc^LB0FDmcoumVbOH4 zAgP-NH}2TNIg=C7bRbOpUqaS4uuP5Y=QK9Vt~M{Oyr;_ziC$Cbh?y8rH&Eh>X~!JKf7U!^wjX&1;6^fz)CTnejJ9vj309pz zy~xR<|6JCmy39jZHWx1N{uB}#6OQk;*j)~uXCepRuUL^SLkR?f9@C1I)!nD&W^&iq zO&8qguIfIB?1*YT3a6-H4}xlUx|2j4JS`ktO`tqXSC^N)rYnqG(4vo#Q$;K9r3m+E zA@JN+{Z-FxDRSW+ex3%tRGKxxr}Dt9xGlPAAaqYfFex38;35}B@oJdajAJ_1C=>xp z^K)=^2kPKK#A%f6M_qfk|Kt*w5f)Uyg_VC13C)|5>x+5&zje%57krbtAZ`hth;IlE zZU{!w(ssetwT8G3!$ybqcwDIhv*Yj%fr|`cFwGR$twkmt*toc)Xm;#gYjCNF4RpGd zo7GpIkD~4>ezESD>oQqxfaD;DPtb_!QixO+aAi!!g+hIlG-Qpb6&OK1(0-=wR~yUM ztM8=5sHZXf=ke2IaY6&P28MXKH2ZoFM@-b61ms+4!mN0{LbxY-SBi@kB_8w^f`EiF zZ^gPxOep)%FT#=d(r-hn2yG|>U{YPhE1}~5^nlAv^p<3~4=P%=Q&ni*mIX#ZvXJlS zE=0*D$Pe50>G9LCe$+!y4{?*C9;Iw5|QFfGHTNMf_XK;6_H|JZDsNo7#rs`Ia z214F3B!qIZ`fT3h0w^L4(8nmKWSv&+iH-93D>prb`|GXaG>OiyHen^D zag#RPX6x2?;X|u_P^cD`l~=2-vW{ltrRoh&tf4R;dxffu)TjqR!ziaM**BYyyUsys1u{bwdYN^_&x4ntlb=V|j0$69&Gs~)V z6Lx4<%-66_(`xOVlJRa`uh*^@>hoA^PyoimXnm{RXuuvlUtMhkoS#=e-&u@ zc;6uLs%Gm;0E%j?ohtM!PzF+BMV3Lp}~|SU-h;%opk)OrrQt_vE0ny4ky8zl6b4tV41cyN+vk zj&Bf%^Km>7$Ag<>uI6TH3q9~UG8ny%xI?qJ)X}=3MD>dnw0R~D@USJc!39x=Qy zAJA^!b59k{BCKw>)@lOr0}wu&ZerMo;lXN<$CYmcnkW9P!%!UB`(ySn-rp%<|J1^W zpMs-YX@r#v^1kTWJH)uMNcnCtF`NB#oQd-~&c=Bi=ik#Y1s> z9pn8{*0Vjvd#03+#_e^yFCKrlcr0$OtBj`zp?cZ(0i^}EFf0$qL}p6@~9-LOEH|7&7{1zq)U zjAc50IL_<%_+94wy*pm8sKE_lXKOAGU&g=NGyD5x~%Z003oQPZM`0=|oTBLUv zQ;(rzJwh~Q24kLxccNq6A{rBRAXtYYPO?4Ld)M(3vd$dF+}={>bxR#bJ@T02NDPbJ zV!f&-WsU*Nv0F=B-rr*oa}30D>=x@DCuNQy%u$SCqhsA7g3T~*F$=i-b!>#$P6lDV zlS4Sz89+GP8A6!uq;%*$bvL`dr?aPnTM)F8r)NTQ5?1mS1UfCZp?SG%U9Obv;8rU% zQF$RWOQHE%XkHA>>CpUOXkIF(g6qojLi597F)mv#S3>jU(ER4moC?iXLvyxl&7x>7 zG_Qo_M?>=?q4|`04@NI%D@%2snnH0hw5LmR3`z_nJ004kX_S?wQC6CkEGj0aB|vs^ z619_)?7`GDQ*-PwCp{+7t~7~u(@afEinY_Mo#rUhGAR3&*mrUcsmVD>agI6kotr~y zj;RuBORO!iti-ZOjy%bc=U6)@wcJOE`zUcAr4nmdHpj9#md&wjl53gdS|(XL$=XTQ zPD&3hZH`NulMQnCX%0Wl$){y$oP3V^nBxkrUcHLP>H6)WqrL>-s_$d)!wh}|0q%UC zV*1+{B&E`_Y`s>&O}xJl?PZkz$o8M?>x3$4`~58C*y;xuusNze&GZI?{sGI@#Y!>T zReBZm9Cd*Km0oobK{zN%FFZ%Q25t;gOQWmzh`Ne&d2gZ$SDLM7p&h#AJ>8W?>`stoU`qo9}X;dLFvMb+>gEt2>D+_V5APhB=unb`@fO44h>%id%_WO zB+arBDZ4MHOLvQ-zE&Crf$l#{X#2FL+_SqHZ{_^sMLvU{ji3MR2*g4LCFo8zfLl*s z@+mnAU1vXCeVmQR>jla(oD-hwz{dZ^*>z{_Aef~*mC!!srlaQ#*u>69j%Xd5W$QL9 zVdr59J5NiPn~CgfZWbn^Pouxh3msoq=`km|^ zw-e!dkMD=PUVAIs3_CA_Gi!s`V;WRT)#VaiuzKuaiGe9H_!2!LI}n$pzMH}SMgX%s z?IO{mVoH65>1Pn|m7FeIVWzrtjjv;&8QV(DsFB=&GV#2ZxF)gn*iVT~)B*ygVfDck z$ZGG|w#u1eM!mp^KEU8b29pfR4E_&;A7wDqA8GzhNE_tviv?s+Xq21mdpDx9ghR8kb!pvmBhxZ!Z31M`BvF9dk>8({* zUe=cHvqk_`g1+Q{;$0BXa0|3`O?lC>W+EfFB_=sVV=HWpaxz_zMBrbj&9#DR<6{~n z468jHEqFYxO754Om7G=xXB-L6>YAID5D^pJ_9J%%SB{z@$FHR2QFn zpr)r4xWR$%%qR4WfLXpfV;{U|EEISV?fxlv5o)-32~87v!gzLqq527ol*~aSOCRFRdI}MUXp0E3{ju=fbI*Xrw z3PJC?2%Jyk9fS@jkW-LR$RkW15N%QOBSlW$708p`@g;{ezM^-SHqvJM(S+vCRnAZC zh@pa)&UW%X!TBupJfd=XN=-ox>~fmu9XJgKT@2dfghX@0J_>2n9_pJC4-Pv@p&v(k z^^**KiUH3?{WOCv^OcnFt{+3iRZhcZSsU`m!G9{7Qmm=rm;4dzFXAUo%6;z&!!$+T zfvYA>b>fRHu8r`;&bfKyGq@-YBF@sJT-+08=<|*Dm@0hhqnC!*VoSStV)2Ke_!=G> zXrZT*l)O54RT}i0ZXG6nJgLBx3jgzki3M0+Q5-)uEWjY!EYOV^o$3v|F3{s!Z1?i4 z;6lVf{;@BtzJ-&0vaWb)M}P20nUH}_;??z=3e|-wCe&T|X0+Ofe7`ZmWshl?g=g3B z6u-7aNeT-q-bY>+XZn+uXG^cc^vrdsX~48b;PQ_#al3?{h<;e#A;UUG1&(x^cE{Q> zKV~6^N7G6wNH1pa%i@>AFYlUnSw0}joHYZt+T22oZ#eKn--Z#z!N`hiHv?Cc4^duV zGsZ?e^-;_&2%vUArZBTmo5EX=52rTA_Zst&X#Y9(>7tm(DZGclnjcguodlC%SRS?7 zBQXYy>jr&HA^`xEEOEp(7O`=0%7!!iA_*H(dJCUU5DfUG(Ms2V*tb}o5ir;@5vgMV z2JUYZgprCSdlWtGXHkZ$n+qd2{VOZaPO1OPqTz1QOzYVx(CjauxST;T9vJ=Yn;nqH z9ywRIh^WZYKvIFjWU;!9IgzTxuh;1 z=c2j@3(OguG~pRNHOpzHmSZcmB)F55#$HTOKCE9Q_HTMK};A z1tz89;qV8nlua3C8rEIP9AzkBw&Q3418f$UBILH>CABsK>}d~wE(y$j(R zz$*yCMH?Q+HXN}A;ZWsl0uL6`0+#6HvGuI_d5jWy(`9@QD+-k2aYx84Tv0acEY!|V zK?ulZo@9fQ47QBjZRhWbvnfDv&*I7K3Je6q|rl1mA? zV$o?iuh1cnkmD#1@};^$cvDvJ#L9Uok^*RkJDov}{bjlTgUb~ahv46<7n{`MGaO+; zyU3GYBqV-?!EZ45Ee5~CpbMR@Joy?bRym)@6TTy*vuV6kXB2Dt0K*l@Nnb-EFczW8 zckl@lK7O&(VE_vByk+9SGOlqge&8gv&quXUNlyzqY4P#6g_PjKFd7G)nW=Vq;#Gd! z)E=6c#iMoAoxSdPfzR&`T7Kv(SA*;D)Aijt-qn_FAfmHJYc-g`qkhkJg|`i0fMp&C zUXJhT9UHhX;^ho*9y3+Q9a{+amx+JGasR6bdhh*23fJDk%^sftL?0o1wqtEtg7@vn zMX{5jdSv25f=wrm@nCBjx36|mi|M%ShvK%*W(F=SpQRynGb(5l*DHav76)^ED0w;SGmo6#|H0(DZG=WwE!O(CCA9kk-3Qee-sJvrGZa8 zrPd-%8Ss}}v?Bi*0U&7XYiK7TBOHJvmd}Uq(5tUlk%zq4%OZ%2nnFjK?<0^oBkl$T zdS6`j=BFs+dvveFi3Wc6tcq+0TwZZNu`k|Bs|s!dRKg;p7JL@WCSfUFx8(&j;+k~G zz-|8zSt>TXl<%!vc&R)9lYq8db=|boZ?he#0VKudu}^;QYQLA;IEXp}f`QJp^2Q2$ z0V}Zzz#jF-s7n+AUU~TwO8GMSQoRL*{2@V13c$lXzQF)x;2iOpSPDN}c)#5RPB2-Wm`5n{DgX)9vpDixyMN zr3<;qrEB$tRVa{=Vv(CJJriqmNS?v(EPm(sdqzG%$X)FD05A8-H9z_01=1bds=p|f zDN@+=DeP(4Qy90+t1+OXnnlpwKVHIzZt`JZ?= z*Zj&6pIZR7+d20zKoWQnTwYlXzCwaLj-lWq39t6NO>f9ku>Xyj@3Z>z>MzkMVd+#= z$en~VSp)P11D7wRBhLm9=mJv^TS+R;$y;}~w40K8^l`flzK=XW1Ns z2?k$B5N05n8`YX8EMNT*%O7VTHpo9?>JJzY7ol!-nS0g`TFYSTJ}?(4U;PD}Q6z20 zp+VaAfp}AtGQUm?I*eAtpo7o|N5RIS2;h^eF>@67blOsXjoLoBnJ{(2jF{t+Me>C?c7HF31*4-&5)4l z48E#N7*3Wf1eB7GfSML4sy}5HftFAo1-^}M`(+KV>l4kpAj@9 z6oaH?s=q-hAs~*JfT4km@PC4VGVOjayaa-PRuqBZ_UVNG(H#SO@Hic3 z3nRHAZ-C6t_9C+YDfdqSDK}2_I04Xp;l5zT^=7LuS*yFEUH7m;SaH9O4fMcFGzi>49yMosm1NYl@B3FZelk+QCx&5K##xqSQd z@>*~mW`^i%?81lFW&+obs0f+4>TT4~SOQlyd`;+j`SwU2nvA%vVT+#sT>jk?GDU-2 zy4&YJ9KDaj_ptyqdU-V{kTs7NF44Uc-l2FYJw*#olb&~xP32KBMNUD1`g4pk*S2fR zg}2RuAaL7Wx>+bb5xutjE+JtlFnWIt1PkxF)Ik54I|?K?Oy6@45+`0GA3+aC|K}U&LA?qk*kDGRV~~UK17-0imVD9 z?>1Z(g|!H=i1L`DtP++{#&??F928@a{OWHpPI)LPB4-KH*cLt+?BP5@Ym_))_W8)W z_yw-t|2P6CANlHK?LoNl58A`+kxv>Ih94ez5}v{%*+Y0|N@p`J^snE(I) literal 0 HcmV?d00001 diff --git a/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-38.pyc b/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80437ee92904062e743f65321f449d7b807dd3c2 GIT binary patch literal 17804 zcmdU1X>eQDb$%Ow2N0wvO4e=}mStIxWm1dnB(CFxh9pLbOxq%5%cva$!h3{3fdF~m zqhz6AlL?i?=`@Lxrpt8H8FHrW#GOp1Ka#dT+D@lwrqeA=_w;p3(k)7xZs``yB<}Z} z_Z}9Cl%2_R+Jf`$zMgyTIp>~xF3$}PW;FcDKT>qQ{@t4PSB&)j1`&BHKF>BZ&C!aQ zqdP`jFY4kOMMHeEXo_zY6Y$M?axPU&=`3s2?YVR@E#X8xGdEBi(6tH8Njj-(nv+^K zidn>LCyiLz9b7hxLx^Xb0mKK8y9u$ZGlTV+-6(x{WskdiRd)_=*QB1ga_6dscF?-> zz)r2Ww>2c;-3Z_Htk&40BUkzv?JfVD)-cX%QQ0w+&7tgYSk}3>m<#jWt?pho2T0uQ zZg%gI`Xgb=^G0!B@g8@?y_@-n?MG~jyU)GLIpQ4skcl3*xcd=17RBy}>Th%JLC8S9 zzH+Z~+}*ofa~|BTxp%>TXuGybm@wyI_wHMB9Aiq$zoGTM@!DLjIg|4**1VioQMGwL zhmhaQRo#ZG%D$VcH+_#qDov-^$Sr#1s+;rtvS0K3T1ASMYfi4#$kZIS;n!xa)*4k5 zb7vOoxyr?|Dp!10MTKXYD(ASlM)Ptr_vG1Axk|ZSNAVj()ycdbB&OVj(@!E`A^^{r zC{&o&`vU@psQg9*Tn#9!5q*KWhGPO%EXQ&Z@Dm|&7Ly@b7E>WQ7HubssSgIJLb+Zk zoO`_W{!9+Pv#!6W8ko;TH+OPo#;y3Z%WiIRaqfbvIQ;^r?{Q*I?qKtR=c>yHl^agZ zeQ_S8T_-Q?%rxuu<`u$#Ex8MefD|h>RjpcUV7^y#Gpae4dlvDFe(va--gIPCDpvHK zS#pvIdYC3-Vk7h&Jfar%%gSeh)Q#A{;|V|dt8g8W@xPg zF$fxq^*VqwQ8+&OXuXMj;ys2yTuBiS)w4gp_kDl=^cUY!{hqBqJ@KX1wYS`O6~Dfb zn<&TO$jJAz5=Xx0d1FamGKeFfqrex0jAebh7S1ED-_+CGgr2YRrrL~ZfnE#r*)`Kp zTaYt~FB*SR>+74S#{Q-6VFaXadkGNhfmhfjvtOfiL%ZRkRNe}V#(a*b}D3DLYwiPaI2V zs2$aL1G;8;V6AgvoS`D zM)pRko%D@%Vp*4d^mb}luj)}hpIG=9c0|^<+D1DG)EHbgmW?I5ZDUUi_3sISgpec- zc-^ho4m%P0v#ftmYaMzC*!2Nzh9EPp*=?PJSk||KcJ0t!5*PH$=;4`(B_rgS?T~u) ztq75dyG4*{D*}R4St6AKxP^%Iimpgl1QA{_1cBA|I3Yw2ZbQM}Hz3g6e{YEq0z0nK zAywTiI8$bUFr%B=-I%wGm77mMvsF4{$JCeg-JnccqA{x+3cLb(JX3F$eRa3Q zidLgkF0UE!qsZ!vSaA)69H6f0Ezaf5AYCq(Dxm7V7bGL!n2A~;4NyXZvb}S8kZm$q zGlvY@F!V!uQqSnE`@ao%^v#{ShM%m{ECP;>d27*5IS-sQmP{Yin>@QMRC`(XjsCcy zaX+=P?L^xGk|cYO#9B&5NTL&0cayg33)!{~OifJ6^7oCW4{vs0JhB$UA>Ldvl@${u z;0}pv2y?DN7$P?mxCzpnMoj1kiMFk^PQC`1K*>X#DP6c)I^jaLS`T?-EwO%yn(xke zfmwGO>HvDmLkgcMmz=U+4oooTK))2|my3gqW~0<{RkL&%?4g+LM1ciGYo>!ZouLOK zw~k|+GZ}%SB`tkeKcE|WR+fH9Zyo)%XVt~VKH!s9bT;iiL~7(hyFxDHfD3{ASs@Rq zYCj;nl_LHpiefwR{k~S(XFeF zp>p5QW4w_)%*`Wsvu8oG#ZK(xOE{K<@rbeA9}h96J{&lufpR&pD^6p&e4*~H z+16xk9h@3|9i#3@12^8x?sx#qCluw0z`EpK^#pQ(Iah0_0p_I@C1!Pq&QX@iIK;#f z#^ZG%E2t((_n`~~!bXMo6%O`c))ne8Sn1F~AQhYQxUiaO585Ndq+xxlu~pw=jTpm* ztsD4u=zI08dh3pLSECQ;WV@tm!giC`j$O99hUH%Duv`m^k-$QXF6;pFM6APr&ms=t zFJi#R0YhaOqzTJKiZI|EaV%oMQYT`J$csqVeAr_Z z_)*^^Q}r!~o+%K$WXK5-Pxr=mLxKm(HQF{vnz8VGexeN~E|UB}KMUrT;wQLy3V{$z zF$)%ki_IlmpTZtQybs3A*b-(eaxaD?c{8BMZByuH-)h8LKuGyUELY{we~2U-5zRVE zbof^5c#H;9i;a^D4|VAw5$jZCrkijZ?%E+LYljt!vY_Tfq}vZ{u6JO!O#R#A;EKB- z;@%Kc*YfUWZ&&JG#PXKnMpFmj6tnZ5yXZ7Srd>2=Dl_58-@+2I?@|$XE+NGeosT|* zKO1|ib=PZvG-Paq2Lxph*ITCAL8kpE^X(Bj^$>?B^r1tQg;l->>Ahra-$sMH%`$Td z1pE4hC=y?YeIUbIQmC`yc^(It z`FXeD2$2>p-$$g$mW!Lou3f4LT<0lo(RG@NvR@CaP69(pjf!AUao|c+7S+O`b;)%y z++`HRJaPz4w^NpG8N=ZAMog>WTfn3p@RRt4Uo~E}ObCHP{Y%ydEn*X8Z6Vd4Kp>_; zos$aqWJEe&qSS{Y!Mmd4fQ93L)+uK?AltDlxCF&Un`d{NoP51)@xXvn3w#jA1c<6Z zBJGY=2f$m}7mq-tKdJ`aW0al}LTcXtBi>ks1V2qsyc_V>qtt$^m8Eli#3A$lEo zrVrflD7UaJE~MVXw>J*08F6v}H9oVH(qde!9NG{v0oAFPB8QiIx?WHBMU^F-Hp%7!m)MBveUM0zscS!YN!el`+i)Bx8n1N z;;&D83W*>cq$b?PIlopfzYpS==qvX2-oEJh zxpHn^xs~!F^QcyyD_?R&GYWOGn=4C8^_uV3-CU*amZ5VVbIV>0I#(&&fVO!wBc-OP zqOR1-o(E-frM~ETxj9r24RoWa=F0V2%N?s+bSsyn188d11M{kR8G30))Y;HiQ;Y2$ zmiDe(tW_@NYBLzDp9AhgiG8J3uR}jQQ(mn5?4KIkeTxk!f))1mzP?7HQBBq602Ecf zdN|iJKxs&cMp+sGtyj%?Rf9fV`VMvJ!sc^6&jO-Ijt1)iXfZfBip6=H{M=j(YIqD2 z^WH#TOpRlKOZy%;B*PE@p%TSkvL^;9)v>RO?h*=5Q47iW>mm-{Y2Hh~&qV$}I6qxfDuv4p=_u@$+J`AXV#)et2|!E)kgyt0JurO zNz0895G$;($c=<;%fu6}5 z#q_xtMMDijQjzEd2{i4hAOXB<_<=RstTh6Q^jQQDX#5jZTxz&M#;4N0=SV6PMo8XJ zrP%=T``G*}x`JlCLvW}pFu=j&uNz|gTS;peLInPdT@Wx@+akg-+TT7}%`*swM{d5$M{Y(^({edVR``M`fjr_r=eC*@4 zQ^vE_$L*)2k3{8Te@oQ`}-NHRDSNrw)F&4yrj0LeDV?pf4SP=U$ z7R3HWSP-YjSP=Vbu^^7e*svv<&+GMLEQtLW3u1pGEQr&$NBzBCeiYaEn52GK`8f=%qlB|_K?@w8LL5wUH; zwc9DUnRXg(wmkrMs67Z5I=k3)4qdmGHnlgkaaw}Ba{pwYk3((V1gnRLiyP0gMdNHK zF#RjdKu6|@KraOP(}8|6&?f@@ZGnEenD8$u*A4V1L~&d+&XfZEOrXCb&`$;WQ-MBJ zG^UU=9q7*n`g4K)OrRf9kE8XXU7D+Tlp*rtfjLo_rc0D?gU$zkVaXzz`Enp2#rrmh z?({T5(+m}uTVQU1X$7W@v*mHNJk8u`$>lr>oJWE4C={5>v}vYIGi{n_;~dL4$1={` zapsOQcU)?4Xww|pv`moAPq6t3c0M6PW9QSH$23Q9{``5|ebrusF6|cqT=i)>zd+}g z;NVR75r$vxOO=vErCN?ux_eX_h%lhvKlJrYXpZI1C#Tw~q4!1=jG0KCEUCE^h7!$)!f~VaSb6aZ@)b zOxWBbJ(t3yExk!`#R2ImDlr&F;=I8~=#FwX#nCu_D3-;e0>hnnTw=H@qeo>%dQ#$2 zn>rGERA_TIAuf~Uah<256S&*yIsA%fI#6}}!ih;aJ$(ug@;=2~o3Npcg6#Rf=YBQ0 z`+u?baPD7C?lK4ie&4c_>xOmvn|3A2l~Qek-pGm+L8$^;W20lkAdZ+LJg#2L!I`$sBQ9iOYNZ?ao1=N zIbp#Iw4l`{3F-*?n0O#njN-t21wn}b+zt^ zAvIT?FW}a$M_t&_yM+qy&#-N(s6wjqNrrwI4m4|bGW-C;3H4cqc`Ham0ou0(sq)+f z-UtSIq(lBy7Lgs7ih4cGF|lpUJR}NZ11e)t7~{;>+O%ejllhc7!#19vGfC%3Iv45u z3Y}l4BWKR8BLH{`89#_m?m3{^%Gg`&cWc`e(<}J;euVy8@p<2&SzvOa;6lyP8W%-P zY!!I#L?KeXgw}f}CdYFgjQemq3Oy9`Uzee@Xf8q-b#;DS1r3Q47rekBa&|uT+Q3^! zm#aNvGQ1)%L|3V(-c$nmewYNQYGu0Vm+Q}S5jbbfXbpu!9*wg4j@Im#Oon}Xqj1=- zqpujUhp<1ehj4K6E$$&049dP)rCC@~#+8~3*1sMREGHHSmcsytSRP|{&vFvCduAdF z%eogh0Zvow-u8B;aP&2hnp&h+58cbU;LxVlpj26uBz(eG6{#JY3HeEQ|lrg@d%*^uTxyGtjl746E)Q5 z===^Hu8{gXoi0t4kj(8jka33yg+S!xaavk)Sq){e5+;3QaSTqsx^ll+TSE&9_mhR>y4$t=;9WLatF9 z@Gmeo)`f;Ucd(C<z7|b6n~iU|Jz?;~&Gs?E*gG?YM0KdunSXmxaVg;^XH)oWd}(|cN59VW0TI`XN!Sjjs}*4%9Pjz}0Hp+HBD z$-1hF-USiVI!HyPd0-jr^M=iF1KZ`vFl*I(A2grYlbOvOYDt2>)mB{p!fjHM)ogTYF=lb4BN zVV+>aEBYi>oG=ZqTe<6lBpx2nc;d}W{sL3dcLhAc}l%lrBg;Z82Xl`Cc>DNOj%Is;C#whZ})Ib;h(G%mvCc)*=}JWLX@v;mj- zV>TYp`7)iqq4Rfix)ADencqdmBKwQDOfr+Slen)>DdyY)9F9uX`vL;KHVe7EjfY)$ z3S_8V02X%ls*Wq$D917Q>`QVV4Rgbko)TuK#5J^6Ps-drlL>Lr*Uay7K*nW zkx}SV3kUr4N~Uds3PauBPEtF7=T?2o$jNcrZsCNe6%e~3c?2-aBcMKO;OJ#UwnW4f zaFGFUWqCS<3vR&-^;u*@W`&{<3_&fD%Y(@JNkTW2$_yMWS~(h_oONeTk?;2iWgV*` zyfa`{s4bw>mmg3ZDz1ym3YHUx*`HvwAoQ0R<-SrU>Cl2LB%ZWO!3qDLnJ@#VnDE?% z6OR`MpazK5b6tZq^|vf1+u@%W5-iE~j`b%vksZj>z#(WyELX{}v)QU|Gxh*+)W0IH zPeS1FmY-~tPoh5MU2KpqP*UIqfaL;SpI~EP8S!Q=fe(kmWt}BKEt^fpy&uR~8Pitqj^IGHc8_XvY&F81qozKhvm*(>_o@jJuBqV2G z^3=_tK2kGwQy&xERVzCRPB?nDd}Y-4F5fh++~4{!P-juPoIa5rpSw`2E<&0NrHk}L z;eklBL+~iRWB88qdqAFCq)+y|K1@G(!Rx&50ec6L>Q9Q8i6nOIDtk)yBt~U3>JFfx zD#K}Q8!g~=Um^}rFE3jLMcH1^q{yjRMqtiXJ{57M0M(5Az z`~{uAr1M2O4jqbS`E(JCeBlI+2kIXX4=g?~4D>7A{Rg{WkZI~e@Vo@DT}!Zs021E~ z;etn@ddS3hX#`HWV&MWY zwThRY*!*XfEI+Z51V2fWpSY=uGTZQI9R%d#ZEGd%;Pufgx7Z+_Ne+Z1 z!WhZM!pFPl-0cBeU}$p9B~X4);bFd}o~QEyop;hH(Rmjgo_y3Zbe^Sip3ZZ0igc#w zuz^-`R48|AKrk$fsDEd+5aG`-gmu$y>W>L3Ld>l&?E)Pk-2Z`4kOFJ2mn*JNef2NQ ze1MKHn*Ufh*)CAJ1->rUcL zDG87Oxn+~4VC!T;;3gHJss59WKuxw|f>By<;&{ZZ7y@Yxd^X8uW9w)RyY0&a5qFEm zy{0RO@BKT8t#$xF{kbp%0)PzeRscMPodNkM0zk~*Xe?~Pz8X1g(*er?Sgf|Sigp}f zKEUb+*MKH{3uw}#l#jb$*(O97G`ZGj=Ef^EN95}siiq2WHINb!gW$?1f)rcxFVuUW zwc$}CD{zY{e1$+0-H-rnSXT+8(B5_qDhU|VGOuN4MAh&}rgdcgs(%rxhww#u?uo0D zzT<_P2$AXXi^!t_1*UpNSwCiB03J7sH6WrS{@Fkl zj~=q-P;2M^sv1XmrF@ImFb9_FzgQP8#SsNM?khRF!de+2ejdap$yU( K3;$SRm;T=n$^=#b literal 0 HcmV?d00001 diff --git a/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-39.pyc b/PTL1/Transcriptomes/Scripts/__pycache__/CUB.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc183567022aee45525ced90d5d1dd04ac25f26a GIT binary patch literal 17634 zcmdUXdvILmb>DsM?%l-#1VM;zQ7cNK2|WNlB-xTxS20X%Uh{HN_?l9{ys)Sb4|B$ILaOxxSmb&@tMle9C9 z`-)my{r%2&?>+#Ck|&*MyO?|K_kNu3eCM3+eCMJvJe)J|``Y-r_q`uAjNfCV|CdJO zB|O0&n}*>TCBrj4t6`Q*d0Qn*-ge2Bw^K^tZ8y@(nNr4NS*PJHXG>WLry9BCq0*3P z%o<+W%iK1+%%)Y!Bj$Qp#IpYIrd=9AJm(D|K7`zTh~>Ru#D*nz)En{k-8M@5y-{yJ z-ecaFcL47L-njPw-s9du?-1S(c!#|scpvl+c}H(p@0#jA`Gff{JXd-#Y^I?*1H@G*oR zdfjLqH<2qXp6oCGrqQ&{7;)K0P_}@wqq?m3XsMv{{c-<|=54fw=xb{}F^NMO4yU?YH+HCwDQum5Ku@)Dll z69B#eVl_ypAX3Y-K@^VXcqzP7nhZ;6O@O70CcKjCLfWw|_EM!0)mj zuBj&Gv*Z^pE-w1ju)gjW=GK<4`HIsoa{2)$<`qu2t_8kYN2t>D3jW(GDD8U_(#~S5 z(P-Ty4%m{vx&}cJd7J)~7Blyn43mE7elvbLY=IbN+wtNo}Jnf1uL zzGDjN0CFbrsBwUSeo1ibTlyVEK>BqzfT=z_>Gqg?-)~($4Fn__!#->tHQQsm(L6BxEJ|ir8c*;ZfTpnxp|IP=hE+E< z?03v9W81i3TrB~e&PJ+}@=`~Pj=g1W8|$X}G~($aM$PQlI_Cvr^|($;QjW0P@g$Zq zP&=;kadge{pj7uZ+mxO_)#)UBbx!x{WjoHgp?+cY>zx$F^6XcQuNYr37cJp?F2;z_ z$bQIl(xKH!ZJN@L*~x60H8bw#7gxWIU62o*j@3znB!)MwO>4vLxY!3HgZqFeAtt%@ zL2vtYbSJ`rHqEaZ?Z@8%Rei-+B+9JYZpY*xHqCKxufxVW0>RO&KB<}7u(Z_dX#O>@ zA|xpCE+HtAn-El<1myv5nyBtV1wfV%!d*)UR~=3gGTSEl3Jd6rUTSYM?iwdIoS^480y55Vjx)rxH_c<c#f7fHRI<*Vp7nw zn&9G!qU5fYH|h++>MVgwsyDOT3tqjd#pYFJ%c@L#$oPA8)Ol1I7;-`>vSWK9a>INF zdzPBR5$xL=mU47F7GD(EGPH!xaKM!Hl}hARz2-vYTEo|4@3I_k~YWz};m_XNQ?mdbggy)s4;^l&M}B}u1I25MikN{Z6M z6ox3AVz}DncOWA!LcpaoYEuQWpzZ01M3lC`gm8E@@%_9m~L4e5u!YCt^VcErr-=6sFWEnxU`mbh-drb z$DpS}tXdrx?8{pH^I@t3K`r|F(4d&*CgLMhIfH;kQzB+XsA4;bWTzqr_3f*Pz;#66 ziav`WN!}v3Z^ss1Ij|ZD@eAALR@zIfikQ+wLORf)?v4}&*su0GS_ zek7`y>S8b9J`%LcLUs-*QBlFQMG(p;K4?(O$)I*k`G=Fx>Jc`?ULbR==1$q%O1e>} z5SnllH}aZJY= zg%r0Ej*Pbh+BuAO8qeta*87ePwQgi^sRl4b`k~e=4EYxkNH|UR%mFzVGst(S$3eOO zLaf+3Q0!wJ4`&nG19MKayA2pAIy`yf3>2CjhpIo$A9#ZWrocKavf|LjnIlD`Ved5Z zy0Lmx8`6?eo|~Lh-bCIRJfiny2U#X3(4{Z#LD4-5TK52-MR%MARX-i8nOXx;#3$5{ zOg*SHR$#n(8gY&P9_>Gaw68K~7)sPWlH!NE5i9Fs74OI?lQr#{kvyUNDEfGyM|1Nn$M z2sRIGD0q7(<~ZY@P6nc63S@L``^@B-SgGmTLx)&Wbec3yGfOs%xUglrlK$ z)SHVf2~=_9DN)-hO95FdQPqXDy2;wSgC+wmn_-S&S6JpjaMWxrgGa^>=J%Uf$OoG+JQ*=xi(nQ|Gdy<8SEk1C*f^&-JVf>#M-Ks_MKLJB>G zC&&Z1rsKF-*L9zBPl(B=io}5*SK%c*0SWq}**ziUvr%T&Z(a%OjmnpyVu_{kadnd;U?_FlQ}81K;v??Q3(Q=iK~q@KPW7tg4m9mEwx-})Z6}4 zb;+;ZkPcvDRZp#`);cW8o*1HGX{PRqpCPt!c7sJsIP(g-S50)!Yrdn?hW0C8xt>FH^ z52D<%AGExBvyO9WZuyo1QNd_liLxuW90(N_G&0Xd=F^e+Y-F816Ith;i>&i!)D?8t z{zUHeifY1&{CwfkUSz>53(`WcwzASvkg$c*xBPHoGS_~lxTX}YC17<|YfUx*3w~mp zzcTmA=PzEKpMG^Gd9*9j4P06@VY#ce- za>$!S-GHq=Rl{|JALT-rJ%hdjrf?Wxc_YHesPSQ0Tl5juFOUiH`9Fg#DxKGkG9F?$!`z5>^j!F7%ctFxM9GCEJ_<*Dz z?1P6Syc-^t^nQ5c8>V;k&h*CKtDslsnAEyI{J*sp$7O8yhd=CEJSc76AMRcSo$`NT zEpR2!vA)R_IUys_5Vy|y?%6BcalQ!^8Kuk*E#-9EdRXdfc(N~y6z`EcDYTgCwy5(T zl~RD-V|Q%ir}u1YN4s%bnQmLBq!kU(ZrqmJZR=x_-w%5;JuTzPcH8PNrNcYR=DO`p zNV~fs$MCqcJJfA=cPX9HE1U1O`-HT+8?x+^((Z7#-KV6ihP}CuP$#e90MoGn-A)ED z*U19rJ41jYonb)OjS}ch-f=hfb@p|j(?h;a&B5MlPDjpS3&M#eXT~op9{l5I$+}#Q z?C@r*n?0wNd)bkBVK7=8jDBt~dT}s1+uu`UetIzXl_(W1Dc|q*{8=%Sb_e!qd9eJe zgRT9@VD8Ld^wMDA`Tl6hn%~j%LVtE-zTQ8O$b56K>CX=qeq2#ZC8KuB%k_YAzb|z< zvS*761VsXg&PH}|7HP#rO8qG`%1p_OXDv3kwJ>FjQo2k-0^t6`3~8mZ#bB0&^E6m-8rc z9!1WhSY$5K7MQlcv<0S3b1c&w%QSPRnLEwgX{o`XEpTWHGC?*!%jRd<`K%0$oiA`6 z3mm~4Z@huyX9I}F1yud*7~AS?V*c;+(62f>6MmcteFUxk++c#EzQu%JCHPwee;WW@ ztuoxVPjtQWOqG3q%CYQ>vQEE1>tsKX94whS*?FmD@<4?h+F>_!|xLO zEJ3#g4DE|d=nXBUGyeutIe7Iq3BCn@vWnsUvVVaIf03ZuWL&mnUF_c#ZcjB=msx(E zfU3D#0EqIUn!{7YZ{Tc0Tw+*9lDy&g01)TyOQK2sNOCTS3ygN-Nr}q7FQVS+cfHlE-o z0mQ~f-x@cBlTm2$_F6;FIY`GNXH$FH;NXhe1>84WJ-6Ys4?|GOg$?a{URqzWz@~RD zw&~eS+puBDI|obNIhcJj+DhoTFn#?vYTG=N(!wVP(;Zem4w=W(0~}_*75N=hp)FD+6Nk%Ts{Z*YH_87MFDmJ>d>phZSX}Pje#NAn+S+8p)6Skm!jqQpt zi=$D0jey(_b}$~7#i9l4)^9M(yE$IX)0ZsDRF<#tUM@0Y8}F~Ph|pyw?)4RnM;ol{ z$Hky)!K5n&U7Xz7`*w_RZX%;T%Qof+UL&|fP$&5N1m7kY9t1M^&moNrNNyfrjLNy= z?iY=Nis@B613zN^B|Lvx7rgXr(T8EAJuRk{!~^fK*r%m{lSQiydn@UjMLkw$0nOb$B z6;>K=aS>#LSnUx#5T7rr%4=y3nQIZyV?HXX0h zGOo;Abe{|9aWt6R$&tbn%I;fE(skcVWMN6w*Dx|o)hD{cKl>pBO`Ff%R3C!b%jIx zDe$+r2ma3Oi9g{0!Vtt}_K(o{2Vv@*M@+GpVU3w5-^ca&`|Tda&JD~2MUDTTFm_`P zjP32e=n_RAjxojA*YAt5^8*;;+Ea)L6F7xHLSwO`k+EfKcR1OA4xHQ|Gvr!)Hyx2q zDHP}N$dz0FjSYNR;C?K=1+cVwkT{3~cJD83t#t}%h?o#I^EBG(+q&Joy2?3{XrU#% z-+s8egU*+qp4pS+NI|%29%6gK%4DNH#Z*iE2wtW1jGBR}x1+Zx4ToPe+GK}Z1;aiL z5`-~uc%V~Ug6t_&{sC&Je@yUC2)I7#pAz)AsDxx@zlDr9*b$3Cyx6XTzm%I&%&FoT z_z~eR;Sty8fxC_on#OON@bZRnT)e!+FA`qfSuclp#vAhTc)K)lPwa~_^eD$IQ3am+ z`t~EXAk$Xgy@i0``Wh}jcxyn9E4c~rE4*ZAdUaew;Hn3vU3jKJ9XR+%_V+!i%URj@-s|OkSM2;c9=KTN|5v8y{#8 z6?oDpmE>T%Z!CQk5tAEXuEWV>a z3TOWv#)slKr!JSLQI@_AQX(3PgUrMoH}SF)9WIK_>%vSY>}Vb1n>6Nc#K|l`;s+|^9QChu)YPf zigm(TlC0zPWz3OSiiE3&o|bis9P$lWmQtQ~>Sf}cQiLm4%1M$=`29TwoMmkp@?YYR zDVGc`!v18y-F$rZBW&pdq|Co!-xK@+!5=SLd1vE9{hEH{*yh4!?U+> zdWQ#(eAm#iwk+8J>)IW%lcLgO;@gBRCw}8%neprCxa4QzlFn8J4mMw+k$lUQuOPk@ zJ8a6g5ipJ09S1I&m-wbrXY7uN?j?*fo0_6xNSwl zrwstMd*iA&*w5aYgP&R2GVlQV8&Vu3E^{l2ehqA0=vhcvh?+Y}%@BN& zK-lK12;nsUUzxxWl~RGfdg0~L5RUwb!K&xZtNxJXWF!0+hJ-%y{bT(m354G724n+0 zcjfXICaH1rpmiK1qy9Vc26O`+W%mcoOB z?ALyY4yId{2YCR}i1qBNKa&tf4a-Sauj?s#$&z&9cP4(j@f$u3wbVvN%D}Hd-tcRX zcRqd($L|p->q@`w);@gT!j?zrN``S}@VK%yv--Qp)fZmJmUQq(Hr^aPYXsjt148EC zn|PYP{{eH%#dG}SJ?HrD|4Vax*IdPysC}~>JBu%4gJ0{Pr?91=xtaGy2H4Tmd*+l_ zwA%SeNXyB~m79}cu>PKP^NIG)gM*7H=9LTC>E&zn+8Q)Yt$}1`i%-SsAA)D`oWpaT z&r|XNMD}9e7k}B;t_9t1Y@h;wkqsur0!I>i-jIDI`x4`_Idu>OtF8gG4^9^GMco2D zB>E>Yu|INJ@Yak{!3x}$qSQ_JpTzr;&KK&R5$rhE++*7B5d1E|?-Bf4f`3Qg6Z|H? zM79KBzj^^j2=$*4j~sr~7nwJEI~D%9VCDSS5+8d2w>z2o2v8FG8ZWP{g}=gWdkRg# z$;5YtW_ zAC&7^D4)7ByaC5|>PT8n;De4Oun2g0ZrDw_wnJaWmp+i9TMk{9w&|v1eeLrbPMF$C zL)PRdYp#A`BNb+_S+VWCp@aA!94zMDj8cYwILg}>8J z+Pz+R4xCee4|CsfyK;9}y~Wh41V2VlCintDg@DH;^*X^D1aA_Q2!52{3IQ8vrzeH! zw}*t}bVTuELCwF#x`Bl;-ZNhm+JtWGF}F$}JpX?p6lEaK8^bW=<0P2Eq3U zzDGbRok;hI1~VFXk_GF)?7%c$cY{fmQktL#2fQZh0vJ2Tdn!`5yU80Ix$>1h575?SH;5K8c zU8Hmn5&1!+P)6=X%BQh)pj^dBNn|8_fNi)-Bd23~kXk^M({Z-ZjwkX9WdHOobTxOO zE4vF_*-0wgJyabOjtvH0Z?+24)w(ANdmsD6g~bl+iRMBSPNbp?TMMr>`Y`x`XP3O- zGOq9+h)5zzBXZPKLrCGleHk_mc+fJh?dHUG@qDg*X60761Y3swWW4a%TXUfo=naO* zLgj7b(INx?Jbd`*`MLIJ4jPj9>tTw1@O(}==s_q#PUY>VKdW5<;J8_UX1%f&7RZt( z3$M^O6|SwgJbjX;pe9`gwejU)ZJ{JTn8A7F2Y4^E?dnS5J+mN0*|wM03lpdGb<2N5 zc4R4V`QR4zBzyx`ZmDBTv|p~bv^tnUXcD4S*+?(t;~4#P18v14MMde>wd-(`)FuR4 z%Cx#ly= 5: + print('\nERROR: You cannot run script 5 without giving a genetic code! If all of the taxa in the run use the same genetic code, then use the --genetic_code argument (e.g. -g Universal). Otherwise, stop after script 4, fill out the spreadsheet called "gcode_translate.tsv," and then run scripts 5-7. If this does not make sense, please ask for help.') + quit() + + ten_digit_codes = [] + if args.first_script == 1 or args.script == 1: + for file in os.listdir(args.assembled_transcripts): + if file[10:] == '_assembledTranscripts.fasta': + ten_digit_codes.append(file[:10]) + else: + if not os.path.isdir(args.output + '/Output'): + print('\nERROR: A folder called "Output" is not found at the given output path. Enter the correct path for --output or start from script 1.\n') + quit() + + if(len(ten_digit_codes) > len(list(dict.fromkeys(ten_digit_codes)))): + print('\nERROR: Duplicate 10-digit codes are not allowed.\n') + quit() + + for code in ten_digit_codes: + for c, char in enumerate(code): + if (c != 2 and c != 5 and char not in 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890') or ((c == 2 or c == 5) and char != '_'): + print('\nERROR: ' + code + ' is an invalid 10-digit code sample identifier. It must of the format Op_me_hsap (Homo sapiens for example). Please ask for help if this does not make sense.\n') + quit() + + if os.path.isdir(args.output + '/Output') and (args.first_script == 1 or args.script == 1): + print('\nERROR: An "Output" folder already exists at the given path. Please delete or rename this folder and try again.\n') + quit() + elif os.path.isdir(args.output + '/Output/Intermediate'): + print('\nIt looks like this run is already complete. Try deleting/renaming the Output folder and try again.\n') + quit() + elif not os.path.isdir(args.output + '/Output'): + os.mkdir(args.output + '/Output') + + scripts = [0, script_one, script_two, script_three, script_four, script_five, script_six, script_seven] + + if args.script == -1: + if args.first_script < args.last_script: + for i in range(1 + args.last_script - args.first_script): + print('\nRunning script ' + str(i + args.first_script) + '...\n') + if i + args.first_script == 1: + if len(ten_digit_codes) == 0: + print('\nNo properly-named assembled transcripts files found.\n') + quit() + else: + scripts[i + args.first_script](args, ten_digit_codes) + else: + scripts[i + args.first_script](args) + else: + print('\nERROR: Invalid script combination: the first script must be less than the last script. If you want to use only once script, use the --script argument.\n') + quit() + else: + if args.script == 1: + if len(ten_digit_codes) == 0: + print('\nNo properly-named assembled transcripts files found.\n') + quit() + else: + scripts[args.script](args, ten_digit_codes) + else: + scripts[args.script](args) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/PTL1/Transcriptomes/Scripts/wrapper_submit.sh b/PTL1/Transcriptomes/Scripts/wrapper_submit.sh new file mode 100644 index 0000000..ed90f2f --- /dev/null +++ b/PTL1/Transcriptomes/Scripts/wrapper_submit.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# +#SBATCH --job-name=PTL1_GBF +#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId) +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances +#SBATCH --mem=160G +#SBATCH --mail-type=ALL +#SBATCH --mail-user=YOUREMAIL@smith.edu + +module purge #Cleans up any loaded modules +module use /gridapps/modules/all #make sure module locations is loaded + +module load slurm +module load tqdm +module load Biopython/1.75-foss-2019b-Python-3.7.4 +module load BLAST+/2.9.0-gompi-2019b +module load DIAMOND/0.9.30-GCC-8.3.0 +module load VSEARCH/2.21.1-GCC-10.3.0 + +parent='/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams/' + +srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}AssembledTranscripts -o ${parent} -n ${parent}Conspecific.txt --genetic_code Universal & +#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate4/Assembled_Transcripts -o ${parent}Plate4 -n ${parent}Plate4/Conspecific.txt --genetic_code ${parent}Plate4/Gcodes.txt & +#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate7/Assembled_Transcripts -o ${parent}Plate7 -n ${parent}Plate7/Conspecific.txt --genetic_code ${parent}Plate7/Gcodes.txt & +#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}Plate11/Assembled_Transcripts -o ${parent}Plate11 -n ${parent}Plate11/Conspecific.txt --genetic_code ${parent}Plate11/Gcodes.txt & +#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate18/Assembled_Transcripts -o ${parent}Plate18 -n ${parent}Plate18/Conspecific.txt --genetic_code ${parent}Plate18/Gcodes.txt & +wait