=SLd1vE9{hEH{*yh4!?U+>
zdWQ#(eAm#iwk+8J>)IW%lcLgO;@gBRCw}8%neprCxa4QzlFn8J4mMw+k$lUQuOPk@
zJ8a6g5ipJ09S1I&m-wbrXY7uN?j?*fo0_6xNSwl
zrwstMd*iA&*w5aYgP&R2GVlQV8&Vu3E^{l2ehqA0=vhcvh?+Y}%@BN&
zK-lK12;nsUUzxxWl~RGfdg0~L5RUwb!K&xZtNxJXWF!0+hJ-%y{bT(m354G724n+0
zcjfXICaH1rpmiK1qy9Vc26O`+W%mcoOB
z?ALyY4yId{2YCR}i1qBNKa&tf4a-Sauj?s#$&z&9cP4(j@f$u3wbVvN%D}Hd-tcRX
zcRqd($L|p->q@`w);@gT!j?zrN``S}@VK%yv--Qp)fZmJmUQq(Hr^aPYXsjt148EC
zn|PYP{{eH%#dG}SJ?HrD|4Vax*IdPysC}~>JBu%4gJ0{Pr?91=xtaGy2H4Tmd*+l_
zwA%SeNXyB~m79}cu>PKP^NIG)gM*7H=9LTC>E&zn+8Q)Yt$}1`i%-SsAA)D`oWpaT
z&r|XNMD}9e7k}B;t_9t1Y@h;wkqsur0!I>i-jIDI`x4`_Idu>OtF8gG4^9^GMco2D
zB>E>Yu|INJ@Yak{!3x}$qSQ_JpTzr;&KK&R5$rhE++*7B5d1E|?-Bf4f`3Qg6Z|H?
zM79KBzj^^j2=$*4j~sr~7nwJEI~D%9VCDSS5+8d2w>z2o2v8FG8ZWP{g}=gWdkRg#
z$;5YtW_
zAC&7^D4)7ByaC5|>PT8n;De4Oun2g0ZrDw_wnJaWmp+i9TMk{9w&|v1eeLrbPMF$C
zL)PRdYp#A`BNb+_S+VWCp@aA!94zMDj8cYwILg}>8J
z+Pz+R4xCee4|CsfyK;9}y~Wh41V2VlCintDg@DH;^*X^D1aA_Q2!52{3IQ8vrzeH!
zw}*t}bVTuELCwF#x`Bl;-ZNhm+JtWGF}F$}JpX?p6lEaK8^bW=<0P2Eq3U
zzDGbRok;hI1~VFXk_GF)?7%c$cY{fmQktL#2fQZh0vJ2Tdn!`5yU80Ix$>1h575?SH;5K8c
zU8Hmn5&1!+P)6=X%BQh)pj^dBNn|8_fNi)-Bd23~kXk^M({Z-ZjwkX9WdHOobTxOO
zE4vF_*-0wgJyabOjtvH0Z?+24)w(ANdmsD6g~bl+iRMBSPNbp?TMMr>`Y`x`XP3O-
zGOq9+h)5zzBXZPKLrCGleHk_mc+fJh?dHUG@qDg*X60761Y3swWW4a%TXUfo=naO*
zLgj7b(INx?Jbd`*`MLIJ4jPj9>tTw1@O(}==s_q#PUY>VKdW5<;J8_UX1%f&7RZt(
z3$M^O6|SwgJbjX;pe9`gwejU)ZJ{JTn8A7F2Y4^E?dnS5J+mN0*|wM03lpdGb<2N5
zc4R4V`QR4zBzyx`ZmDBTv|p~bv^tnUXcD4S*+?(t;~4#P18v14MMde>wd-(`)FuR4
z%Cx#ly= 5:
+ print('\nERROR: You cannot run script 5 without giving a genetic code! If all of the taxa in the run use the same genetic code, then use the --genetic_code argument (e.g. -g Universal). Otherwise, stop after script 4, fill out the spreadsheet called "gcode_translate.tsv," and then run scripts 5-7. If this does not make sense, please ask for help.')
+ quit()
+
+ ten_digit_codes = []
+ if args.first_script == 1 or args.script == 1:
+ for file in os.listdir(args.assembled_transcripts):
+ if file[10:] == '_assembledTranscripts.fasta':
+ ten_digit_codes.append(file[:10])
+ else:
+ if not os.path.isdir(args.output + '/Output'):
+ print('\nERROR: A folder called "Output" is not found at the given output path. Enter the correct path for --output or start from script 1.\n')
+ quit()
+
+ if(len(ten_digit_codes) > len(list(dict.fromkeys(ten_digit_codes)))):
+ print('\nERROR: Duplicate 10-digit codes are not allowed.\n')
+ quit()
+
+ for code in ten_digit_codes:
+ for c, char in enumerate(code):
+ if (c != 2 and c != 5 and char not in 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890') or ((c == 2 or c == 5) and char != '_'):
+ print('\nERROR: ' + code + ' is an invalid 10-digit code sample identifier. It must of the format Op_me_hsap (Homo sapiens for example). Please ask for help if this does not make sense.\n')
+ quit()
+
+ if os.path.isdir(args.output + '/Output') and (args.first_script == 1 or args.script == 1):
+ print('\nERROR: An "Output" folder already exists at the given path. Please delete or rename this folder and try again.\n')
+ quit()
+ elif os.path.isdir(args.output + '/Output/Intermediate'):
+ print('\nIt looks like this run is already complete. Try deleting/renaming the Output folder and try again.\n')
+ quit()
+ elif not os.path.isdir(args.output + '/Output'):
+ os.mkdir(args.output + '/Output')
+
+ scripts = [0, script_one, script_two, script_three, script_four, script_five, script_six, script_seven]
+
+ if args.script == -1:
+ if args.first_script < args.last_script:
+ for i in range(1 + args.last_script - args.first_script):
+ print('\nRunning script ' + str(i + args.first_script) + '...\n')
+ if i + args.first_script == 1:
+ if len(ten_digit_codes) == 0:
+ print('\nNo properly-named assembled transcripts files found.\n')
+ quit()
+ else:
+ scripts[i + args.first_script](args, ten_digit_codes)
+ else:
+ scripts[i + args.first_script](args)
+ else:
+ print('\nERROR: Invalid script combination: the first script must be less than the last script. If you want to use only once script, use the --script argument.\n')
+ quit()
+ else:
+ if args.script == 1:
+ if len(ten_digit_codes) == 0:
+ print('\nNo properly-named assembled transcripts files found.\n')
+ quit()
+ else:
+ scripts[args.script](args, ten_digit_codes)
+ else:
+ scripts[args.script](args)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/PTL1/Transcriptomes/Scripts/wrapper_submit.sh b/PTL1/Transcriptomes/Scripts/wrapper_submit.sh
new file mode 100644
index 0000000..ed90f2f
--- /dev/null
+++ b/PTL1/Transcriptomes/Scripts/wrapper_submit.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+#SBATCH --job-name=PTL1_GBF
+#SBATCH --output=PTL1.%j.out # Stdout (%j expands to jobId)
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --ntasks-per-node=64 ##change to number of srun when running multiple instances
+#SBATCH --mem=160G
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=YOUREMAIL@smith.edu
+
+module purge #Cleans up any loaded modules
+module use /gridapps/modules/all #make sure module locations is loaded
+
+module load slurm
+module load tqdm
+module load Biopython/1.75-foss-2019b-Python-3.7.4
+module load BLAST+/2.9.0-gompi-2019b
+module load DIAMOND/0.9.30-GCC-8.3.0
+module load VSEARCH/2.21.1-GCC-10.3.0
+
+parent='/beegfs/fast/katzlab/becky/PTL1/Transcriptomes/Forams/'
+
+srun -D ${parent}Scripts python3 ${parent}Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}AssembledTranscripts -o ${parent} -n ${parent}Conspecific.txt --genetic_code Universal &
+#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate4/Assembled_Transcripts -o ${parent}Plate4 -n ${parent}Plate4/Conspecific.txt --genetic_code ${parent}Plate4/Gcodes.txt &
+#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate7/Assembled_Transcripts -o ${parent}Plate7 -n ${parent}Plate7/Conspecific.txt --genetic_code ${parent}Plate7/Gcodes.txt &
+#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 1 -2 7 -x --assembled_transcripts ${parent}Plate11/Assembled_Transcripts -o ${parent}Plate11 -n ${parent}Plate11/Conspecific.txt --genetic_code ${parent}Plate11/Gcodes.txt &
+#srun -D ${parent}HQ/Scripts python3 ${parent}HQ/Scripts/wrapper.py -1 2 -2 7 -x --assembled_transcripts ${parent}Plate18/Assembled_Transcripts -o ${parent}Plate18 -n ${parent}Plate18/Conspecific.txt --genetic_code ${parent}Plate18/Gcodes.txt &
+wait