From a559f61567a0cd9f3e3f5c19f5c4e0ee1507a39d Mon Sep 17 00:00:00 2001 From: Auden Cote-L'Heureux <52716489+AudenCote@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:28:04 -0500 Subject: [PATCH] Generalizing and adding examples to run_eukphylo.sh --- PTL2/run_eukphylo.sh | 64 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/PTL2/run_eukphylo.sh b/PTL2/run_eukphylo.sh index b5151ec..b63c485 100644 --- a/PTL2/run_eukphylo.sh +++ b/PTL2/run_eukphylo.sh @@ -1,18 +1,23 @@ -###GA 11/11/24 -###Updated run script to include grid and unity commands. -###The first block of code is specific to the grid. The second block is specific to unity. Pick one and delete the other. +## Last updated Jan 2025 by Auden Cote-L'Heureux + +## This shell script is used for running EukPhylo part 2, and includes a general setup for use on an HPC that uses +## the Slurm workload manager. It also includes several example run commands, which correspond to examples explained in more detail in the +## EukPhylo Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop). +## These run commands can also be copied and run in the terminal / command line separately, without a shell script. + #!/bin/bash -#SBATCH --job-name=GA1 # Job name -#SBATCH --output=Run_phylotol.%j.out # Stdout (%j expands to jobId) -#SBATCH --mail-type=ALL -#SBATCH --mail-user=youremail@smith.edu ##add your email address +## SLURM-SPECIFIC SETUP BELOW - -###Grid start +#SBATCH --job-name=EukPhylo # Job name +#SBATCH --output=Run_EukPhylo.%j.out # Stdout (%j expands to jobId) #SBATCH --nodes=1 -#SBATCH --ntasks=10 ##change this to be double the number of task/batches you want to launch +#SBATCH --ntasks=10 ## On the Smith College HPC (Grid), we have to change this to be double the number of task/batches you want to launch +#SBATCH -c 24 # Number of Cores per Task +#SBATCH --mem=125G # Requested Memory +#SBATCH -q long # Partition. Only use this on certain HPCs (e.g., Unity at UMass). +#SBATCH -t 336:00:00 # Job time limit module purge #Cleans up any loaded modules module use /gridapps/modules/all #make sure module locations is loaded @@ -25,26 +30,23 @@ module load RAxML module load IQ-TREE/2.1.2-gompi-2021b module load tqdm/4.64.1-GCCcore-12.2.0 module load Python/3.9.6-GCCcore-11.2.0 -export PATH=$PATH:/beegfs/fast/katzlab/grid_phylotol_setup/programs/standard-RAxML-master -parent='/beegfs/fast/katzlab/' #add your path starting with the name of your folder, should begin with /beegfs/fast/katzlab/ -#if you are running batches, you need an srun line for each batch! -srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --similarity_filter --sim_cutoff 0.95 --sim_taxa sim_taxa.txt --blacklist GuidanceRemovedSeqs_allConservedRuns_ML_nov_dec_2023.txt --start raw --end trees --gf_list B1_listofOGs.txt --taxon_list taxon_list.txt --data OutgroupR2Gs --output ${parent}Output_folder_B1 > Output_folder_B1.out & -wait -###Grid end +export PATH=$PATH:/Path/To/Executable/Files + +parent='/Your/Home/Folder/' # The folder where you are running EukPhylo (this should contain the Scripts and input data folders + +## RUN COMMANDS BELOW + +# A simple run of part 2, starting from ReadyToGo files and running through tree building +srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder + +# Another example run starting from ReadyToGo files and running through tree building, with the commonly used similarity filter cutoff, blacklist, and "sim_taxa_list" arguments (see Wiki) +srun --exact -n 1 -D ${parent} python3 ${parent}Scripts/eukphylo.py --start raw --end trees --gf_list ${parent}listofOGs.txt --taxon_list ${parent}taxon_list.txt --data ${parent}Input_folder --output ${parent}Output_folder --similarity_filter --blacklist ${parent}Blacklist.txt --sim_cutoff 0.99 --sim_taxa sim_taxa_list.txt + +# An example of running just the concatenation step of part 2, starting from trees +srun --exact -n 1 -D ${parent} python eukphylo.py --start trees --concatenate --concat_target_taxa Sr_rh --data Output + +# See the Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop) for more details! + + -###Unity start -#SBATCH -c 24 # Number of Cores per Task -#SBATCH --mem=125G # Requested Memory -#SBATCH -q long # Partition -#SBATCH -t 336:00:00 # Job time limit -module purge #Cleans up any loaded modules -module load miniconda/22.11.1-1 -module load mafft/7.481 -module load conda/latest -conda activate /work/pi_lkatz_smith_edu/Conda_PTL6p2/envs/PTL/ -parent='/work/pi_lkatz_smith_edu/' #add your path startin> -#if you are running batches, you need an srun line for each batch! -srun -D ${parent} python3 ${parent}Scripts/eukphylo.py --similarity_filter --sim_cutoff 0.99 ## This line is incomplete -wait -###Unity end