diff --git a/PTL2/Dockerfile b/PTL2/Dockerfile new file mode 100644 index 0000000..0deba38 --- /dev/null +++ b/PTL2/Dockerfile @@ -0,0 +1,47 @@ +FROM python:3.8.20-bullseye +RUN apt-get update -y && apt-get upgrade -y && \ + apt-get install git wget ncbi-blast+-legacy mafft build-essential vim -y + +# PhyloToL installation +# Tested on: RUN git clone -b patch-1 https://github.com/DLBPointon/EukPhylo.git +# Will work on below container after PR merge + +RUN git clone -b Docker https://github.com/Katzlab/EukPhylo.git + +# IQ-TREE installation +RUN wget https://github.com/Cibiv/IQ-TREE/releases/download/v1.6.12/iqtree-1.6.12-Linux.tar.gz && \ + tar xzf iqtree-1.6.12-Linux.tar.gz && \ + mv iqtree-1.6.12-Linux/ /usr/local/iqtree && \ + ln -s /usr/local/iqtree/bin/iqtree /usr/local/bin/iqtree + +# Install python dependencies +RUN pip install biopython==1.75 ete3==3.1.2 tqdm==4.66.4 six==1.17.0 + +# VSEARCH installation +RUN wget https://github.com/torognes/vsearch/releases/download/v2.21.0/vsearch-2.21.0-linux-aarch64.tar.gz && \ + tar xzf vsearch-2.21.0-linux-aarch64.tar.gz && \ + mv vsearch-2.21.0-linux-aarch64 /usr/local/vsearch && \ + ln -s /usr/local/vsearch/bin/vsearch /usr/local/bin/vsearch + +# Guidance installation +RUN mkdir guidance && \ + wget https://taux.evolseq.net/guidance/static/download/guidance.v2.02.tar.gz && \ + tar -xzvf guidance.v2.02.tar.gz -C guidance --no-same-owner && \ + cd guidance/guidance.v2.02 && make + +# Install pre-compiled version of Trimal 1.5.0 +RUN wget https://github.com/inab/trimal/releases/download/v1.5.0/trimAl_Linux_x86-64.zip && \ + unzip trimAl_Linux_x86-64.zip && \ + mv trimAl_Linux_x86-64 /usr/local/trimal + +# Clean the container +RUN rm -rf iqtree-1.6.12-Linux.tar.gz vsearch-2.21.0-linux-aarch64.tar.gz guidance.v2.02.tar.gz trimAl_Linux_x86-64.zip + +# Add executables to path +ENV PATH="$PATH:/iqtree/bin/iqtree:/usr/local/vsearch/bin/vsearch:/usr/local/trimal/trimal:/usr/local/bin" + +# Change executable permissions on wrapper script +RUN ["chmod", "+x", "/EukPhylo/PTL2/run_eukphylo.sh"] + +# Change entrypoint on execution of the container +ENTRYPOINT ["bash","/EukPhylo/PTL2/run_eukphylo.sh"] diff --git a/PTL2/run_eukphylo.sh b/PTL2/run_eukphylo.sh index 6db85bd..234a86d 100644 --- a/PTL2/run_eukphylo.sh +++ b/PTL2/run_eukphylo.sh @@ -6,10 +6,17 @@ ## EukPhylo Wiki (https://github.com/Katzlab/EukPhylo/wiki/EukPhylo-Part-2:-MSAs,-trees,-and-contamination-loop). ## These run commands can also be copied and run in the terminal / command line separately, without a shell script. - +parent='/EukPhylo/PTL2' +out_dir='/Output_data' +in_dir='/Input_data' ## EXAMPLE RUN COMMANDS BELOW # A simple run of part 2, starting from ReadyToGo files and running through tree building -python3 Scripts/eukphylo.py --start raw --end trees --gf_list listofOGs.txt --taxon_list taxon_list.txt --data Input_folder --output Output_folder > Output.out - +python3 ${parent}/Scripts/eukphylo.py \ + --start raw \ + --end trees \ + --gf_list ${parent}listofOGs.txt \ + --taxon_list ${parent}taxon_list.txt \ + --data ${in_dir} \ + --output ${out_dir} > Output.out diff --git a/README.md b/README.md index e6f502c..f063f52 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,30 @@ - -**EukPhylo version 1.0** is an updated version of the PhyloToL pipeline from the [Katz Lab](https://www.science.smith.edu/katz-lab/) at Smith College. EukPhylo is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our [Wiki](https://github.com/Katzlab/EukPhylo/wiki) for more information on installation and usage! +> :warning: ** This is currently being dockerised! ** Use the container at your own peril! + +# **EukPhylo version 1.0** +EukPhylo is an updated version of the PhyloToL pipeline from the [Katz Lab](https://www.science.smith.edu/katz-lab/) at Smith College. EukPhylo is a phylogenomic toolkit for processing transcriptomic and genomic data that includes novel phylogeny-informed contamination removal techniques. See our [Wiki](https://github.com/Katzlab/EukPhylo/wiki) for more information on installation and usage! + + +## Dockerfile + +The docker file can be executed with: + +```bash +cd EukPhylo + +# Build the container +docker build -f Dockerfile . --tag MyEuk:1 + +# Get the container IMAGE_ID +docker image list + +# Current command is: +docker run -it \ + --mount type=bind,src=$(pwd)/databases,dst=/Databases \ + --mount type=bind,src=$(pwd)/input_data,dst=/Input_data \ + --mount type=bind,src=$(pwd)/output_data,dst=/Output_data \ + {IMAGE_ID} +``` + +After development, GitHub CICD workflows can be added to automatically build and release the dockerfile for the end user.