#!/usr/bin/perl use CGI; use CGI qw(:standard); use CGI::Carp qw(warningsToBrowser fatalsToBrowser); use strict; use Storable; use Bio::SeqIO; #use Bio::TreeIO; use Bio::Tree::NodeI; use Bio::Root::Root; use Bio::Tree::TreeI; use lib "/bioseq/bioSequence_scripts_and_constants"; use GENERAL_CONSTANTS; use SELECTON_CONSTANTS; use BIOSEQUENCE_FUNCTIONS; use TREE_parser; use lib "/bioseq/Selecton/external_scripts"; use codonAlign; ###### READING DATA FROM FORM - FIRST TO DO my $queryForm = new CGI; my %FORM; #hash with form information #*********************************** ### input file, ref seq. and email address my $upload_unaligned_file_dna = $queryForm->param('userFILEunaligned'); my $upload_MSA_file_dna=$queryForm->param('userFILEaligned'); $FORM{msa_SEQNAME} = $queryForm->param("msa_SEQNAME"); $FORM{email_address} = $queryForm->param("email_add"); my $recipient = $FORM{email_address}; # check if the user which runs this run has not exeeded its maximal number of runs my $user_ip = $ENV{'REMOTE_ADDR'}; BIOSEQUENCE_FUNCTIONS::check_if_user_is_allowed("selecton",$user_ip, $recipient); #*************************************** ### advanced options $FORM{pdb_ID} = $queryForm->param("pdb_ID"); my $upload_PDB_file = $queryForm->param("pdb_FILE"); my $pdbUploadName = "FILE"; $FORM{chain} = $queryForm->param("chain"); $FORM{MODEL} = $queryForm->param("MODEL"); $FORM{EMPIRICAL_MATRIX} = $queryForm->param("EMPIRICAL_MATRIX"); #adid added the mec model my $empiricalMatrix = $FORM{EMPIRICAL_MATRIX}; my $epsilonPrecision = $queryForm->param("PRECISION"); # added v2.2 # NOTE: if more genetic codes are added (or changed), the info should be edit in the "prepare_pipe" routine. my $model=$FORM{MODEL}; # NOTE: if more models are added (or changed), the info should be edit in 2 more places, as text: 1. in the OUTPUT file, in "start_output_html" routine, 2. in the "prepare_pipe" routine. my $method="Bayesian"; ## ugly patch, since we removed ML: DO NOT REMOVE THIS LINE, it is necessary for colorCoding #$FORM{DISTRIBUTION} = $queryForm->param("DISTRIBUTION"); $FORM{CATEGORIES} = $queryForm->param("CATEGORIES"); my $upload_TREE_file = $queryForm->param("tree_FILE"); $FORM{BL} = $queryForm->param("BL"); my $optimizeBL="y"; #true if checked if ($FORM{BL} eq ""){ $optimizeBL="n"; } $FORM{GENCODE} = $queryForm->param("GENCODE"); # NOTE: if more genetic codes are added (or changed), the info should be edit in the "prepare_pipe" routine. #**************************************** #### TRANSLATE to LOWER case the PDB file and UPPER case the CHAIN $FORM{pdb_ID} =~ tr/[A-Z]/[a-z]/; $FORM{chain} =~ tr/[a-z]/[A-Z]/; #**************************************** #### set pdb_ID to FILE if uploading a file if ($upload_PDB_file ne "") { $FORM{pdb_ID} = $pdbUploadName; } #************************************** ##### variables ####### my $querySeqFoundinMSA="no"; my $query=$FORM{msa_SEQNAME}; #********************************** ##### GENERAL PATHS my $run_name = $^T; #the running dir NAME old $$ my $ibis_external_scripts_path = "/bioseq/bioSequence_scripts_and_constants/"; my $WorkingDir = GENERAL_CONSTANTS::SERVERS_RESULTS_DIR."Selecton/" . $run_name . "/"; my $http_path = GENERAL_CONSTANTS::SELECTON_URL; my $job_canceled_page = $http_path.'/cancel_page.html'; my $WWWdir = $http_path."/results/" . $run_name . "/"; my $PdbPath = GENERAL_CONSTANTS::PDB_DIVIDED; #*************************************** ### PROGRAMS OR PERL OR EXECUTABLES my $extractPDBinfo = $ibis_external_scripts_path . "extract_info_from_pdb.pl"; my $kill_job_script = "/cgi-bin/kill_process.cgi"; my $qsub_script = $WorkingDir."qsub.sh"; my $runClac_inQ = "/bioseq/Selecton/selecton_run_calc.pl"; my $clustalw= 'ssh bioseq@biocluster clustalw'; #'/usr/local/bin/clustalw'; my $muscle = 'ssh bioseq@biocluster muscle'; #'/usr/local/bin/muscle'; #*************************************** ### Sending e-mail from ibis my $send_email_dir = GENERAL_CONSTANTS::SEND_EMAIL_DIR_IBIS; my $smtp_server = GENERAL_CONSTANTS::SMTP_SERVER; my $userName = GENERAL_CONSTANTS::ADMIN_USER_NAME; my $userPass = GENERAL_CONSTANTS::ADMIN_PASSWORD; my $email_subject; my $email_message; my $email_system_return; #*************************************** ### output related paths my $InpSeqFile = $WorkingDir . "path.txt"; #file containing ls result of pdb my $OutputURL = $WWWdir ."output.html"; #link to output file my $OutHtmlFile = $WorkingDir . "output.html"; #OUTPUT to the user my $Logs_dir = GENERAL_CONSTANTS::SERVERS_LOGS_DIR."Selecton/"; my $OutLogFile = $Logs_dir.$run_name.".log"; my $QsubLogFile = $Logs_dir.$run_name."_Q.log"; #************************************** ##### SPECIFIC TO THIS RUN variables my $pid; #to set the pid of the child my $PdbFileDir = ""; #pdb file specific dir on path (2 letters) to check my ($cgi_pid, $cmd); my @sequences_names = (); # This array will hold the names of the sequences, as it appears in the input file. my $Qstat_No_file = "QSTAT_NO"; my $estimated_run_time = "none"; #***************************** ### making absolute path file for ATEN pdb1ed5.ent.Z or pdbFILE.ent.Z if file was uploaded my $PdbFileName = "pdb" .$FORM{pdb_ID}. ".ent.gz"; my $PdbFileNameUnc = "pdb" .$FORM{pdb_ID}. ".ent"; my $PdbPrefix = "pdb" .$FORM{pdb_ID}; if ($FORM{pdb_ID} eq "") { $PdbPrefix=""; } # file names in use only in case of PDB reading, for the use of the script $extractPDBinfo my ($pdb_data) = ($PdbFileNameUnc) =~ /(\w+)/; #extracting prefix, put it in first var my $title_file = $pdb_data.".title"; my $pdb_fasta = $pdb_data.".pdbfasta"; $pdb_data.=".pdbdata"; my $pdb_to_fasta_error = "pdb_to_fasta.error"; my $pdb_msa = $PdbPrefix . "_PDB_MSA.pdbfasta"; my $clustal_outFile = $PdbPrefix . "_PDB_MSA.out"; # the outfile for clusalw my $clustal_aligned_file = $PdbPrefix."_PDB_MSA.aln"; #***************************** ### FILE NAMES IN USE my $dnaMSAprefix= $PdbPrefix ."DNA"; my $aminoMSAprefix= $PdbPrefix ."AMINO"; my $fileUploadName_dna_unaligned=$dnaMSAprefix."_unaligned".".txt"; my $fileDna_aligned = $dnaMSAprefix.".msa"; my $fileName_amino_aligned = $aminoMSAprefix.".msa"; my $fileUploadPath_dna_unaligned=$WorkingDir . $fileUploadName_dna_unaligned; #uploaded file from user - unaligned my $copied_dna_unaligned = $WorkingDir . "COPY_".$fileUploadName_dna_unaligned;; # a copy of the same file my $wwwfileUploadPath_dna_unaligned = $WWWdir . $fileUploadName_dna_unaligned;#www path : uploaded file from user - unaligned my $fileDnaPath_aligned = $WorkingDir . $fileDna_aligned; #dna file after alignment my $fileAminoPath_aligned = $WorkingDir . $fileName_amino_aligned; #amino file after alignment ($PdbFileDir) = ($FORM{pdb_ID}) =~ /\w(\w{2})/; my $PdbFilePath = $PdbPath . $PdbFileDir . "/" . $PdbFileName; my $codonAlignLogFile=$WorkingDir."codonAlign.log"; my $treeUpload = $WorkingDir . "userTree.txt"; my $copied_treeUpload = $WorkingDir . "COPY_userTree.txt"; # a copy of the same file my $userTree = $WWWdir . "userTree.txt"; my $userTree_copy = $WWWdir . "COPY_userTree.txt"; my $PdbFile = $PdbPrefix . ".ent"; my $sequences_names_file = "sequences.names"; my $runCalcInput = "runCalcInput.txt"; my $FormInput = "formInput.txt"; my $qsub_ans = $WorkingDir."qsub_ans.txt"; my $statistics_file = SELECTON_CONSTANTS::STATISTICS_FILE; #***************************** ### pdb related links my $pdbUpload = $WorkingDir . $PdbFileNameUnc; #name for an uploaded PDB FILE #********************************** ### HTML definitions my $ErrorDef = "ERROR! Selecton session has been terminated: \n"; my $SysErrorDef = "
SYSTEM ERROR - Selecton session has been terminated!
Please verify that there are no errors in your input file/s, and try to run Selecton again. Specifically, make sure your file is in the correct format, and refer to the FAQ for further assistance.
| |||||
Selecton is now processing your request.
Your job status is: Queued
The time that passed since submitting the query is: 00:00
Please note this may be a lengthy process and an email will be sent to the address you supplied once the calculation is finished.
This page will be automatically updated every 30 seconds. You can also reload it manually.
Once the job has finished, several links to the output files will appear below.
If you wish to view these results at a later time without recalculating them, please bookmark this page. The results will be kept on the server for three months.Running Parameters:
EndOfHTML print OUTPUT "\n"; print OUTPUT "PDB ID = $FORM{pdb_ID}
\n" if ($FORM{pdb_ID} ne ""); if ($upload_PDB_file ne ""){ if ($upload_PDB_file =~ m/^.*(\\|\/)(.*)/) {print OUTPUT "PDB file = $2
\n";} else {print OUTPUT "PDB file = $upload_PDB_file
\n";} } print OUTPUT "Chain identifier = $FORM{chain}
\n" if ($FORM{chain} ne ""); if ($upload_unaligned_file_dna ne ""){ if ($upload_unaligned_file_dna =~ m/^.*(\\|\/)(.*)/) {print OUTPUT "DNA unaligned file = $2
\n" ;} else {print OUTPUT "DNA unaligned file = $upload_unaligned_file_dna
\n" ;} } if ($upload_MSA_file_dna ne ""){ if ($upload_MSA_file_dna =~ m/^.*(\\|\/)(.*)/) {print OUTPUT "DNA MSA file = $2
\n";} else {print OUTPUT "DNA MSA file = $upload_MSA_file_dna
\n" ;} } print OUTPUT "Query sequence name in MSA file = $FORM{msa_SEQNAME}
\n" if ($upload_unaligned_file_dna ne ""); print OUTPUT "Model = Positive selection enabled (M8, beta + w >= 1)
\n" if ($model eq "M8"); print OUTPUT "Model = Null model: no positive selection(M8a, beta + w = 1)
\n" if ($model eq "M8a"); print OUTPUT "Model = Null model: no positive selection(M7, beta)
\n" if ($model eq "M7"); print OUTPUT "Model = Positive selection enabled(M5, gamma)
\n" if ($model eq "M5"); print OUTPUT "Model = Mechanistic Empirical Combination Model (MEC)
\n" if ($model eq "MEC"); if ($model eq "MEC"){ print OUTPUT "Amino-Acid empirical matrix to be expanded = "; print OUTPUT $empiricalMatrix; print OUTPUT "
\n"; } print OUTPUT "Number of categories = $FORM{CATEGORIES}
\n" if ($FORM{CATEGORIES} ne ""); if ($upload_TREE_file ne ""){ if ($upload_TREE_file =~ m/^.*(\\|\/)(.*)/) {print OUTPUT "User tree file: $2
\n" ;} else {print OUTPUT "User tree file: $upload_TREE_file
\n" ;} } print OUTPUT "
\n\n"; $cgi_pid = $$; chomp($cgi_pid); print OUTPUT "\n