#!/usr/local/bin/perl use strict; use Storable; use lib "/bioseq/bioSequence_scripts_and_constants/"; #"/db1/System/bioseq/scripts_for_servers"; use GENERAL_CONSTANTS; use BIOSEQUENCE_FUNCTIONS; use SELECTON_CONSTANTS; use lib "/bioseq/Selecton/external_scripts"; use pipe_for_selecton; my $WorkingDir = shift; my $runCalcInput = $WorkingDir.shift; my $formInput = $WorkingDir.shift; # hashes to retrieve the runs' input from the storable element my %FORM = (); my %run_data = (); # VARS FROM INPUT FILE # vars read from form: my ($epsilonPrecision, $query_seq_name_to_run, $upload_TREE_file, $optimizeBL); # General vars my ($OutLogFile, $querySeqFoundinMSA, $tree_faq, $PdbPrefix, $method, $run_name, $PdbFileNameUnc, $was_Pdb_uploaded, $proc_comm, $estimated_run_time, $begin_Q_runtime); # HTML related vars my ($SysErrorDef, $ContactDef, $WWWdir, $ErrorDef, $OutputURL); # files which were created by the cgi my ($fileDna_aligned, $treeUpload, $OutHtmlFile, $cgi_log_file, $sequences_names_file, $fileName_amino_aligned, $upload_unaligned_file_dna, $upload_MSA_file_dna, $clustal_aligned_file, $pdb_data); my @sequences_names = (); # This array will hold the names of the sequences, as it appears in the input file. my $qsub_ans_file = $WorkingDir."qsub_ans.txt"; # The flag file which is read by the daemon process my $finish_flag = $WorkingDir."END_OK"; # The file which denotes that the run was finished. &readInput(); my $http_path = GENERAL_CONSTANTS::SELECTON_URL; ### Sending e-mail from the cluster my $smtp_server = GENERAL_CONSTANTS::SMTP_SERVER; my $userName = GENERAL_CONSTANTS::ADMIN_USER_NAME; my $userPass = GENERAL_CONSTANTS::ADMIN_PASSWORD; my $mail = "mailto:".GENERAL_CONSTANTS::ADMIN_EMAIL."?subject=Selecton%20Run%20No:%20$run_name"; my $email_subject; my $email_message; my $email_system_return; my $send_email_dir = GENERAL_CONSTANTS::SEND_EMAIL_DIR; # VARS DEFINED IN THE SCRIPT: # scripts or executables my $biocluster_external_scripts_path = "/bioseq/Selecton/external_scripts/"; #my $selecton = "/bioseq/pupkoSVN/trunk/programs/selecton/selecton"; #"/d/bioinfo/users/adist/pupkoSVN/trunk/programs/selecton/selecton"; my $selecton = "/bioseq/Selecton/selecton.exe"; #my $selecton = $biocluster_external_scripts_path."srcSelecton/srcV2.2/selecton"; #my $mecSelecton = "/bioseq/pupkoSVN/trunk/programs/mec/mec"; my $mecSelecton = "/bioseq/Selecton/mec.exe"; #"/d/bioinfo/users/adist/pupkoSVN/trunk/programs/mec/mec"; # the exe of the kaks using the mec model is added by adid (29.1.07) my $colorCoding = $biocluster_external_scripts_path . "colorCoding.v2.pl"; my $colorCodingLinear = $biocluster_external_scripts_path . "colorCodingLinear.pl"; my $statTest = "/cgi-bin/statTest.cgi"; # REMARK : SHOULD BE RE-WRITTEN AND RUN FROM THE CLUSTER # vars my $pdbUpload = $WorkingDir . $PdbFileNameUnc; #name for an uploaded PDB FILE my $significance_test_faq = "/overview.html#meth5"; # files which will be created by this script my $rsml = "rasmol.txt";#"colors.txt"; my $areTherePositiveSites = $WorkingDir."areSitesPositive.txt"; my $colorsLinear = "colors.html"; #results color-coded onto the linear sequence my $outputScoreFile="kaks.res"; my $selection4Site_file = $WorkingDir."selection4Site.txt"; my $kaks_file = $WorkingDir."kaks.res"; my $params = "globalResult.txt"; my $log = "kaks4site.log"; my $final_out = $PdbPrefix . ".gradesPE"; my $finalDNAFile_seq_names = "DNA.names.msa"; my $finalAminoFile_seq_names = "AMINO.names.msa"; my $tree_out = "kaks4site.tree"; my $rasmol_file = $PdbPrefix .".rsml"; my $statistics_file = SELECTON_CONSTANTS::STATISTICS_FILE; #***************************** ### FGiJ path and related files my $FGiJ_path = "/fgij/"; my $FGiJ_pipe_pdb = $FORM{pdb_ID} . "_selecton" . $run_name . "_pipe.pdb"; my $FGiJ_link = $FGiJ_path . "fg.htm?mol=/results/" . $run_name . "/" . $FGiJ_pipe_pdb; my $pipe_error = "pipe.error"; #--------------------------------------------------------------------------- # C A L C U L A T I O N #--------------------------------------------------------------------------- $begin_Q_runtime = &printTime(); open LOG, ">".$OutLogFile; print LOG &printTime(); print LOG "\nEntered selecton_run_calc.pl\nUpdating the HTML with running status\n"; # updating the HTML status to "running" my $ans = &GENERAL_CONSTANTS::print_Q_status_in_html($OutHtmlFile, "Running", "no", $estimated_run_time); print LOG $ans if ($ans ne "OK"); &run_calc(); ### changing back the names in the DNA and AMINO files, so it will hold the original sequnces names. &return_seq_names_to_files($fileName_amino_aligned, $finalAminoFile_seq_names, \@sequences_names); &return_seq_names_to_files($fileDna_aligned, $finalDNAFile_seq_names, \@sequences_names); #if a tree was created, we output it to the users, than first have to write to it original names #if ((-e $WorkingDir.$tree_out) && !(-z $WorkingDir.$tree_out)){ # print LOG "renaming numbered tree output $tree_out to : no_tree.txt\n"; # my $cmd = "mv ".$WorkingDir.$tree_out." ".$WorkingDir."no_tree.txt"; # my $out = `$cmd`; # print LOG "moving returned: $out\n"; # chmod 0744, $WorkingDir."no_tree.txt"; # print LOG "going to run change_tree_file_names\n"; # #&change_tree_file_names(\@sequences_names, $WorkingDir."no_tree.txt", $WorkingDir.$tree_out); # print LOG "after change_tree_file_names\n"; #} #------------ ### create a pdb file with pipe in its header, for FGiJ to read if ($was_Pdb_uploaded eq "yes"){ print LOG "Going to prepare pipe file\n"; &prepare_pipe(\@sequences_names); } else{ print LOG "Not creating pipe, since the value of \$was_Pdb_uploaded is: \"$was_Pdb_uploaded\"\n"; } ### PRINTING output FINAL NOTES AND LINKS open OUTPUT, ">>" .$OutHtmlFile; flock OUTPUT,2; print OUTPUT "\n

Selecton calculation is FINISHED

\n"; print OUTPUT "

Final Result:

"; if ($was_Pdb_uploaded eq "yes") { #3D print OUTPUT "\n

Graphical display of Selecton results with FirstGlance in Jmol

\n"; } else { print OUTPUT "\n

View Color Coded Selecton Results

\n"; } # check if there are P.S. sites. If yes - a button for statistical testing open POS, "<$areTherePositiveSites"; my @pos = ; chomp @pos; close POS; if ($pos[0] eq "yes"){ print OUTPUT "Positively selected sites found.
\n"; ###### here is a link for statistical testing for the M8 and MEC models if (($FORM{MODEL} eq "M8") || ($FORM{MODEL} eq "MEC")) { my $ans = &add_data_to_input_file; # in case the data was not added - we don't create a submit button, we let the user know he can contact us if ($ans eq "OK"){ print OUTPUT "\n
\n"; print OUTPUT "
\n"; print OUTPUT "\n"; print OUTPUT "\n"; print OUTPUT "\n"; print OUTPUT "\n"; print OUTPUT "This will run your data with a null model of evolution

\n"; } else{ print OUTPUT "Please contact us if you wish to run a statistical test for your results, and mention this number: $run_name
"; } } } else { print OUTPUT "No positively selected sites found in the protein.
\n"; } print OUTPUT "

Output Files:

\n\n"; ### print the links if ($was_Pdb_uploaded eq "yes") { #if user ran selecton with a PDB struct.: Ka/Ks scores 2gether with color coding print OUTPUT "

Codon Ka/Ks scores color-coded on the linear sequence

\n"; } print OUTPUT "

Codon Ka/Ks scores (numerical values)"; # add the gapped output only in case there was gapped output (if the files are identical, the only difference will be in the line 'Displayed on sequence 1< including gaps>' if ((-s $WorkingDir."kaks.res.gaps") - (-s $WorkingDir.$outputScoreFile) > 15){ print OUTPUT " - Reference sequence only

\n"; print OUTPUT "

Codon Ka/Ks scores (numerical values) - For each position in the MSA, including gaps

\n"; } else{ print OUTPUT "

\n"; } if ( $upload_unaligned_file_dna ne "no"){ # print amino aln only if the user supplied a non-aligned file (then we run codon-align & produce an amino aln) print OUTPUT "

Amino Acid Multiple Sequence Alignment (in Fasta format)\n"; } print OUTPUT "

Codon Multiple Sequence Alignment (in Fasta format)\n"; if ($upload_TREE_file eq "NOT_GIVEN") { # no user tree supplied - NJ tree created print OUTPUT "

Phylogenetic Tree\n"; } print OUTPUT "

Likelihood and parameters of Selecton run\n"; print OUTPUT "

Log-file of Selecton run\n"; if ($was_Pdb_uploaded eq "yes") { #if user ran selecton with a PDB struct. print OUTPUT "

RasMol coloring script source

\n"; print OUTPUT "

PDB file updated with Selecton results in its header

\n"; } print OUTPUT "\n

Please report any problem in case of need.

\n"; flock OUTPUT,8; close OUTPUT; ### write to the output that the job has finished open OUTPUT, "<$OutHtmlFile"; flock OUTPUT,2; my @output = ; flock OUTPUT,8; close OUTPUT; open OUTPUT, ">$OutHtmlFile"; flock OUTPUT,2; foreach my $line (@output){ if ($line =~ /Selecton Job Status Page/i){ #finds the phrase "Selecton" job status page, case-insensitive print OUTPUT "

Selecton Job Status Page - FINISHED

\n"; print OUTPUT "

Go to the results

\n"; } else { print OUTPUT $line; } } flock OUTPUT,8; close OUTPUT; ### stop the automatic reload system 'echo "(cd '.$WorkingDir.' ; chmod -R og=rx * )" | /bin/tcsh'; &stop_reload; # reporting the statistics file on a succssful ending my $total_runtime = BIOSEQUENCE_FUNCTIONS::subtract_time_from_now($begin_Q_runtime); open STATISTICS, ">>".$statistics_file; flock STATISTICS, 2; print STATISTICS "$run_name total runTime: $total_runtime\n"; flock STATISTICS, 8; close STATISTICS; $email_subject = "Your Selecton results for run number $run_name are ready"; $email_message = "Selecton finished calculation. Please click on the following link to view the results:\n$WWWdir"."output.html\nPlease note: the results will be kept on the server for three months."; open LOG, ">>$OutLogFile"; print LOG "\nSending mail to user.\n"; GENERAL_CONSTANTS::send_mail("Selecton", $FORM{email_address}, $run_name, $email_subject, $email_message); if (-e $WorkingDir."core"){ print LOG "remove core file from working directory\n"; unlink $WorkingDir."core"; } print LOG "\nSelecton run completed successfully!"; print LOG "\n************** END OF LOG FILE *****************\n"; close LOG; exit; #---------------------------------------------------------------------------------------- # S U B R O U T I N E S #---------------------------------------------------------------------------------------- sub readInput{ # ------ storable on ------- my $input_data = retrieve($runCalcInput); %run_data = %$input_data; my $FORM_data = retrieve($formInput); %FORM = %$FORM_data; $run_name = $run_data{run_name}; $WorkingDir = $run_data{WorkingDir}; $WWWdir = $run_data{WWWdir}; $epsilonPrecision = $run_data{epsilonPrecision}; $query_seq_name_to_run = $run_data{query_seq_name_to_run}; $optimizeBL = $run_data{optimizeBL}; $querySeqFoundinMSA = $run_data{querySeqFoundinMSA}; $tree_faq = $run_data{tree_faq}; $method = $run_data{method}; $SysErrorDef = $run_data{SysErrorDef}; $ErrorDef = $run_data{ErrorDef}; $ContactDef = $run_data{ContactDef}; $fileDna_aligned = $run_data{fileDna_aligned}; $treeUpload = $run_data{treeUpload}; $OutHtmlFile = $run_data{OutHtmlFile}; $cgi_log_file = $run_data{cgi_log_file}; $OutLogFile = $run_data{OutLogFile}; $fileName_amino_aligned = $run_data{fileName_amino_aligned}; $OutputURL = $run_data{OutputURL}; $estimated_run_time = $run_data{estimated_run_time}; $sequences_names_file = $run_data{sequences_names_file}; $was_Pdb_uploaded = $run_data{was_Pdb_uploaded}; ($run_data{upload_TREE_file} eq "NOT_GIVEN") ? $upload_TREE_file = "" : $upload_TREE_file = $run_data{upload_TREE_file}; ($run_data{PdbFileNameUnc} eq "NOT_GIVEN") ? $PdbFileNameUnc = "" : $PdbFileNameUnc = $run_data{PdbFileNameUnc}; ($run_data{PdbPrefix} eq "NOT_GIVEN") ? $PdbPrefix = "" : $PdbPrefix = $run_data{PdbPrefix}; ($run_data{pdb_data} eq "NOT_GIVEN") ? $pdb_data = "": $pdb_data = $run_data{pdb_data}; ($run_data{clustal_aligned_file} eq "NOT_GIVEN") ? $clustal_aligned_file = "" : $clustal_aligned_file = $run_data{clustal_aligned_file}; $upload_MSA_file_dna = $run_data{upload_MSA_file_dna}; $upload_unaligned_file_dna = $run_data{upload_unaligned_file_dna}; # ------ storable on ------- # ------ storable off ------- #unless(open INPUT, $runCalcInput){ # open ANS, ">".$qsub_ans_file; # print ANS "NOT_OK"; # close ANS; # chmod 0755, $qsub_ans_file; # exit; #} #while(){ # chomp; #if(/RUN NAME: (.+)/) {$run_name = $1;} #elsif(/WORKING DIR: (.+)/) {$WorkingDir = $1;} #elsif(/WWW DIR: (.+)/){$WWWdir = $1;} #elsif(/PRECISION LEVEL: (.+)/) {$epsilonPrecision = $1;} #elsif(/EVOLUTONARY MODEL: (.+)/){$FORM{MODEL} = $1;} #elsif(/EMPIRICAL MATRIX: (.+)/) {#ONLY IF IT IS MEC MODEL #($1 eq "NOT_GIVEN") ? $FORM{EMPIRICAL_MATRIX} = "" : $FORM{EMPIRICAL_MATRIX} = $1;} #elsif(/QUERY NAME TO RUN: (.+)/){$query_seq_name_to_run = $1;} #elsif(/DISTRIBUTE CATEGORIES: (.+)/){$FORM{CATEGORIES} = $1;} #elsif(/TREE_WAS_UPLOADED\?: (.+)/){#REMARK: CHANGE THIS VAR'S CONTENT IN THE REST OF THE SCRIPT TO TRUE/FALSE. #($1 eq "NOT_GIVEN") ? $upload_TREE_file = "" : $upload_TREE_file = $1;} #$upload_TREE_file = $1;} #elsif(/OPTIMIZE BRANCH LENGTH\? (.+)/){$optimizeBL = $1;} #elsif(/GENETIC CODE: (.+)/){$FORM{GENCODE} = $1;} #elsif(/GIVEN QUERY NAME: (.+)/){$FORM{msa_SEQNAME} = $1;} #elsif(/PDB ID: (.+)/){#if given # ($1 eq "NOT_GIVEN") ? $FORM{pdb_ID} = "" : $FORM{pdb_ID} = $1;} #elsif(/PDB NAME: (.+)/){ # ($1 eq "NOT_GIVEN") ? $PdbFileNameUnc = "" : $PdbFileNameUnc = $1;} ##elsif(/PDB CHAIN: (.+)/){ ## ($1 eq "NOT_GIVEN") ? $FORM{chain} = "" : $FORM{chain} = $1;} #elsif(/PDB PREFIX: (.+)/){ # ($1 eq "NOT_GIVEN") ? $PdbPrefix = "" : $PdbPrefix = $1;} #elsif(/PDB DATA FILE: (.+)/){ # ($1 eq "NOT_GIVEN") ? $pdb_data = "" : $pdb_data = $1;} #elsif(/CLUSTAL ALN: (.+)/){ # ($1 eq "NOT_GIVEN") ? $clustal_aligned_file = "" : $clustal_aligned_file = $1;} #elsif(/FOUND QUERY IN MSA\?: (.+)/){$querySeqFoundinMSA = $1;} #elsif(/TREE FAQ: (.+)/){$tree_faq = $1;} #elsif(/METHOD: (.+)/){$method = $1;} #elsif(/USER EMAIL: (.+)/){ # ($1 eq "NOT_GIVEN") ? $FORM{email_address} = "" : $FORM{email_address} = $1;} #elsif(/SYS ERROR: (.+)/){$SysErrorDef = $1;} #elsif(/ERROR DEF: (.+)/){$ErrorDef = $1;} #elsif(/CONTACT DEFINITION: (.+)/){$ContactDef = $1;} #elsif(/WAS PDB UPLOADED\?: (.+)/){$was_Pdb_uploaded = $1;} #elsif(/DNA FILE NAME: (.+)/){$fileDna_aligned = $1;} #elsif(/UPLOADED TREE PATH: (.+)/){$treeUpload = $1;} #elsif(/OUTPUT HTML PATH: (.+)/){$OutHtmlFile = $1;} #elsif(/LOG PATH: (.+)/){$cgi_log_file = $1;} #elsif(/QSUB LOG: (.+)/){$OutLogFile = $1;} #elsif(/SEQ NAMES FILE: (.+)/){$sequences_names_file = $1;} #elsif(/AMINO FILE NAME: (.+)/){$fileName_amino_aligned = $1;} #elsif(/WAS A DNA UNALIGNED FILE UPLOADED\?: (.+)/){$upload_unaligned_file_dna = $1;} #elsif(/WAS A DNA ALIGNED FILE UPLOADED\?: (.+)/){$upload_MSA_file_dna = $1;} #elsif(/URL OUTPUT: (.+)/){$OutputURL = $1;} #elsif(/ESTIM RUNTIME: (.+)/){$estimated_run_time= $1;} #} #close INPUT; # ------ storable off ------- # if reading was OK, we report it, for the daemon open ANS, ">".$qsub_ans_file; print ANS "OK"; close ANS; chmod 0755, $qsub_ans_file; # recreating the sequences array of the DNA sequences names unless (open SEQ_NAMES, $WorkingDir.$sequences_names_file){ &sys_error_exit("cannot open the file ".$WorkingDir.$sequences_names_file." for reading $!\n");} while(){ chomp; $sequences_names[0] = ""; if(/(\d+) (.+)/){ $sequences_names[$1] = $2; } } close SEQ_NAMES; } ######################################################################################### # CALCULATION AND POST-PROCESSING sub run_calc { &run_kaks4site; # The next routine assumes these files were created, therefore first we check that it was created unless ((-e $selection4Site_file) && !(-z $selection4Site_file) && (-e $kaks_file) && !(-z $kaks_file)) { &sys_error_exit("run_calc: The file $selection4Site_file or $kaks_file was not created (or contains no data) during the run of selecton.v2.2. Cannot process outputs"); } #print LOG "run_calc : going to run routine change_colors_if_significant\n"; #&change_colors_if_significant; if ($was_Pdb_uploaded eq "yes") { #if user ran selecton with a PDB struct. ## run the script colorCoding.pl to produce the output files print LOG "run_calc : touch $final_out\n"; $proc_comm = "perl $colorCoding $method \'$query_seq_name_to_run\' $WorkingDir $selection4Site_file $clustal_aligned_file $pdb_data $kaks_file $final_out $PdbFileNameUnc $fileName_amino_aligned $rsml $params"; print LOG "run_calc: running $proc_comm\n"; system 'echo "(cd '.$WorkingDir.';touch '.$final_out.'; chmod oug+rx '.$final_out.')" | /bin/tcsh'; system 'echo "(cd '.$WorkingDir.'; '.$proc_comm.')" | /bin/tcsh'; # check if the script $colorCoding found an error if (-e $WorkingDir."error"){ &read_colors_error_and_exit; } # REMARK: I don't think it is necesseray in the new server, since we wont use PE ##### copy final PE files to pdbspt dir and compress the PDB file #my $string1 = "cd $WorkingDir"; #my $string2 = "cp consurf.spt pdbspt/consurf.spt"; #my $string3 = "gzip -c $PdbFileNameUnc > pdbspt/pdbfile.ent"; #my $string4 = "mv consurf.spt colors.txt"; #my $string6 = "mv $rasmol_file rasmol.txt"; #my $string5 = "chmod ogu+rx pdbspt/*"; # #print LOG "\nrun_calc: Copy the final PE files to pdbspt dir\n"; # #system 'echo "('.$string1.'; '.$string2.'; '.$string3.'; '.$string4.'; '.$string6.';' .$string5.';)" | /bin/tcsh'; } $proc_comm = "perl $colorCodingLinear $run_name $WorkingDir $colorsLinear $selection4Site_file $areTherePositiveSites"; print LOG "\nrun_calc: running $proc_comm \n"; system 'echo "(cd '.$WorkingDir.'; '.$proc_comm.')" | /bin/tcsh'; # check if the script $colorCodingLinear found an error if (-e $WorkingDir."error"){ &read_colors_error_and_exit; } } ###################################################################################### # run kaks4site.exe sub run_kaks4site { ###### should add verification that two refuting arguments aren't given here... # my $selecton_comm="$selecton -c \'$fileDna_aligned\'"; #default run: bayesian, beta+w>1 (M8) , w=8, ref=1st seq, std nuc.code, NJ tree my $selecton_comm="$selecton -i $WorkingDir" . $fileDna_aligned . " -e" . $epsilonPrecision; #default run: bayesian, beta+w>1 (M8) , w=8, ref=1st seq, std nuc.code, NJ treeepsilon by default set to 0.1 if ($FORM{MODEL} eq "MEC") { #if the model is MEC call another exe $selecton_comm = "$mecSelecton -i $WorkingDir" . "$fileDna_aligned"; if ($FORM{EMPIRICAL_MATRIX} eq "JTT"){ $selecton_comm .= " -z 0"; } if ($FORM{EMPIRICAL_MATRIX} eq "WAG"){ $selecton_comm .= " -z 1"; } if ($FORM{EMPIRICAL_MATRIX} eq "mtREV24"){ $selecton_comm .= " -z 2"; } if ($FORM{EMPIRICAL_MATRIX} eq "cpREV45"){ $selecton_comm .= " -z 3"; # if no z is given mecSelecton will run by default with JTT } } if ($querySeqFoundinMSA eq "yes") { $selecton_comm .= " -q \'$query_seq_name_to_run\'"; } if ($FORM{MODEL} eq "M7") { # beta no additional omega1. prob(beta) set to 1 $selecton_comm .= " -p1 -Fp"; } if ($FORM{MODEL} eq "M8a") { #beta + w = 1 $selecton_comm .= " -w1 -Fw"; #do not optimize omega (omega set to 1) (M8a) } if ($FORM{MODEL} eq "M5") { $selecton_comm .= " -dg"; } if ($FORM{CATEGORIES} ne "") { my $catAdd=" -n ".$FORM{CATEGORIES}; $selecton_comm .= $catAdd; } if ($upload_TREE_file ne "NOT_GIVEN") { $selecton_comm .= " -u \'$treeUpload\'"; } if ($optimizeBL eq "n"){ $selecton_comm .= " -bn"; } if ($FORM{GENCODE} != 0) { my $genAdd=" -g ".$FORM{GENCODE}; $selecton_comm .= $genAdd; } print LOG "\nrun_kaks4site: running $selecton_comm\n"; print LOG "run_kaks4site: SeqName = ***$query_seq_name_to_run***\n"; system "cd $WorkingDir; $selecton_comm; chmod ogu+rx *"; # The program can't run from rsh #check for user errors in kaks4ite.log my $kaksLogFile = $WorkingDir.$log; open OUTPUT, ">>$OutHtmlFile"; unless (open (LOGFILE,"$kaksLogFile")) { close (LOGFILE); &sys_error_exit("Error in run_kaks4site, $kaksLogFile does not exist"); } while (){ my $line=$_; if ($line =~ /\S+/){ my @userError = split(/\s/,$line); if ($userError[0] eq "USER"){ $line =~ s/USER ERROR://; ## query sequence not found my $first_line = $line; while (){ $line = $_; $first_line = "
".$first_line."
".$line; } print OUTPUT "\n

  • Warning: The query sequence name \'$FORM{msa_SEQNAME}\' is not found in the MSA file.
    The calculation continues. The first sequence in MSA is used as a query.

\n"; close OUTPUT; print LOG "\nrun_kaks4site: query sequence not found. Calculation continues with 1st sequence in MSA.\n"; } if (($line =~ /found in the tree file but not found in the sequence file/) || ($line =~ /Error reading tree file/)) { #mismatch between MSA names and tree names my $err=$line; my $line1 = ; my $line2 = ; $err .= "$line1 "."$line2"; close (LOGFILE); &print_to_output_and_exit("Error in tree file:
$err Please check that all the names in the sequence file are identical to all the names in the tree.", "run_kaks4site: $err"); } elsif ($line =~ /Bad format in tree file/){ close (LOGFILE); &print_to_output_and_exit("Bad format in tree file.
Please correct your tree file according to Selecton accepted format and re-submit your query.", "run_kaks4site: $line"); } elsif ($line =~ /The nucleotide sequences contained the character: (.*)/) { my $illegal = $1; close (LOGFILE); &print_to_output_and_exit("The nucleotide sequences file contained an illegal character: $1. Only the following characters are accpted: A,C,G,T,-. Please correct your file and re-submit it to Selecton.","run_kaks4site: $line"); } elsif($line =~ /Unable to read file. It is required that each line is no longer than/){ close (LOGFILE); &print_to_output_and_exit("Selecton does not accept DNA sequences which are longer than ".GENERAL_CONSTANTS::SELECTON_MAX_NUCLEOTIDE." nucleotides.", "run_kaks4site: $line"); } } } close OUTPUT; close (LOGFILE); } ###################################################################### # creating new files to hold DNASeqNames in DNA file and AMINO file sub return_seq_names_to_files{ my $current_file = shift; my $new_file = shift; my $ref_seq_name_arr = shift; # reference to the sequence names array unless (open IN, $WorkingDir.$current_file){ print LOG "could not open file $WorkingDir"."$current_file for reading. Names of files will be displayed as numbers.\n" ; } else { unless (open OUT, ">".$WorkingDir .$new_file){ print LOG "could not open file $WorkingDir"."$new_file for writing. Names of files will be displayed as numbers.\n"; } else{ while (){ if(/>(\d+)/){ print OUT ">".$ref_seq_name_arr->[$1]."\n"; } else{ print OUT $_; } } close OUT; } close IN; } } ###################################################################################### sub change_tree_file_names{ my $ref_sequences_names = shift; my $input_tree = shift; my $output_tree = shift; my ($tree, $err); print LOG "change_tree_file_names : going to change numbers from tree $input_tree to names in file $output_tree\n"; unless (open TREE, $input_tree){ print LOG "change_tree_file_names : could not open the file $input_tree for reading. the tree file will be presented with numbers\n"; return; } #check validity of input tree file $tree = ; close TREE; my @tree_arr = split(/\(/, $tree); my @sub_tree = (); my @temp_arr; my $sub_counter = 0; # building the array @sub_tree, so that each cell will hold maximum one sequence name for(my $i=0; $i<@tree_arr; $i++){ if ($tree_arr[$i] ne ""){ $tree_arr[$i] = "(".$tree_arr[$i]; } if ($tree_arr[$i] =~ m/.*,.+/){ @temp_arr = split(/,/, $tree_arr[$i]); foreach (@temp_arr){ $sub_tree[$sub_counter] = $_.","; $sub_counter++; } } else{ $sub_tree[$sub_counter] = $tree_arr[$i]; $sub_counter++; } } # rebuilding the tree, this time replacing the sequences names with the names found in the DNA input file my $final_tree = ""; my ($exp, $rest_of_exp, $new_rest_exp); my $seq_found = "no"; for (my $k=1; $k<@sub_tree; $k++){ #in this part we wish to split the expression to 2 parts; left part : (?seq_name ; right part: all the rest if ($sub_tree[$k] ne ""){ if ($sub_tree[$k] =~ m/(.+)(:.+)/){ $exp = $sub_tree[$k]; $rest_of_exp = ""; while ($exp =~ m/(.+)(:.+)/){ $exp = $1; $rest_of_exp = $2.$rest_of_exp; } } # in case the expression is of format: seq_name:distance, elsif($sub_tree[$k] =~ m/(.+)(\);.+)/){ $exp = $1; $rest_of_exp = $2; while ($exp =~ m/(.+)(\))/){ $exp = $1; $rest_of_exp = $2.$rest_of_exp; } } # in case the expression is of format: seq_name)*, elsif($sub_tree[$k] =~ m/(.+)(\)?.+)/){ $exp = $1; $rest_of_exp = $2; while ($exp =~ m/(.+)(\))/){ $exp = $1; $rest_of_exp = $2.$rest_of_exp; } } # if the length (value after the ":") is equal to zero, we replace it with a very small value, # because the selecton.exe cannot calculate trees with zeros $new_rest_exp = ""; while($rest_of_exp =~ m/(.?:)(\d\.?\d*)(.+)/){ if(!($2>0) && !($2<0)){ $rest_of_exp = $3; $new_rest_exp .= $1."0.000000001"; } else{ $rest_of_exp = $3; $new_rest_exp .= $1.$2; } } $new_rest_exp .=$rest_of_exp; $rest_of_exp = $new_rest_exp; $exp =~ m/(\(?)(.+)/; $final_tree.= $1.$ref_sequences_names->[$2].$rest_of_exp; } #an empty cell stands for a "(" sign else{ $final_tree.= "("; } } if ($final_tree =~ m/,$/){ chop $final_tree; } unless (open NEW_TREE, ">".$output_tree){ &sys_error_exit("change_tree_file_names:: cannot open file $output_tree for writing."); } print LOG "change_tree_file_names : printing edited tree to file $output_tree and chmod it.\n"; print NEW_TREE $final_tree; close NEW_TREE; chmod 0755, $output_tree; } ###################################################################### # prepare the variables to be sent to the "pipe" script. than calls the script with all needed vars. sub prepare_pipe{ my $sequences_names = shift; print LOG "\nEentered prepare_pipe()\n"; # vars that should be edits before sent to the pipe script: my ($pipe_pdb_id, $pipe_chain, @pipe_Model, $model_ref, $pipe_distribute_categories, $pipe_optimizeBL, @pipe_genetic, $genetic_ref, $pipe_dna_input, $pipe_query, $pipe_precision, $pipe_empirical); ($FORM{pdb_ID} eq "") ? $pipe_pdb_id = "UPLOADED" : $pipe_pdb_id = $FORM{pdb_ID}; ($FORM{chain} eq "") ? $pipe_chain = "none" : $pipe_chain = $FORM{chain}; if ($FORM{MODEL} eq "M8") {$pipe_Model[0] = 'Positive selection enabled (M8, beta + w >= 1)' ;} elsif ($FORM{MODEL} eq "M8a") {$pipe_Model[0] = 'Null model: no positive selection(M8a, beta + w = 1)' ;} elsif ($FORM{MODEL} eq "M7") {$pipe_Model[0] = 'Null model: no positive selection(M7, beta)' ;} elsif ($FORM{MODEL} eq "M5") {$pipe_Model[0] = 'Positive selection enabled(M5, gamma)' ;} elsif ($FORM{MODEL} eq "MEC") {$pipe_Model[0] = 'Mechanistic Empirical Combination Model (MEC)' ;} else {$pipe_Model[0] = "IGNORED";} $model_ref = \@pipe_Model; ($FORM{MODEL} eq "MEC") ? $pipe_empirical = $FORM{EMPIRICAL_MATRIX} : $pipe_empirical = "IRRELEVANT"; if ($epsilonPrecision == 0.1) {$pipe_precision = "Intermediate precision";} elsif ($epsilonPrecision == 1) {$pipe_precision = "Low precision- faster run";} elsif ($epsilonPrecision == 0.01) {$pipe_precision = "High precision- slower run";} else {$pipe_precision = "DEFAULT";} ($FORM{CATEGORIES} eq "") ? $pipe_distribute_categories = 8 : $pipe_distribute_categories = $FORM{CATEGORIES}; ($optimizeBL eq "y") ? $pipe_optimizeBL = "True" : $pipe_optimizeBL = "False"; if ($FORM{GENCODE}==0) {$pipe_genetic[0] = "Nuclear Standard" ;} elsif ($FORM{GENCODE}==1) {$pipe_genetic[0] = "Nuclear Blepharisma";} elsif ($FORM{GENCODE}==2) {$pipe_genetic[0] = "Nuclear Ciliate";} elsif ($FORM{GENCODE}==3) {$pipe_genetic[0] = "Nuclear Euplotid";} elsif ($FORM{GENCODE}==4) {$pipe_genetic[0] = "Mitochondria Vertebrate";} elsif ($FORM{GENCODE}==5) {$pipe_genetic[0] = "Mitochondria Invertebrate";} elsif ($FORM{GENCODE}==6) {$pipe_genetic[0] = "Mitochondria Yeast";} elsif ($FORM{GENCODE}==7) {$pipe_genetic[0] = "Mitochondria Ascidian";} elsif ($FORM{GENCODE}==8) {$pipe_genetic[0] = "Mitochondria Echinoderm";} elsif ($FORM{GENCODE}==9) {$pipe_genetic[0] = "Mitochondria Flatworm";} elsif ($FORM{GENCODE}==10) {$pipe_genetic[0] = "Mitochondria Protozoan";} else {$pipe_genetic[0] = "IGNORED";} $genetic_ref = \@pipe_genetic; ($sequences_names->[$query_seq_name_to_run] eq "") ? $pipe_query = "\"\"" : $pipe_query = $sequences_names->[$query_seq_name_to_run]; #since there is only 1 input file, there will be only var sent to the pipe script. In order that the pipe script will know what kind of input it is - a short string is added at the beginning. if ( $upload_unaligned_file_dna ne "no"){ $pipe_dna_input = "SELECTON_UN".$upload_unaligned_file_dna; } elsif ($upload_MSA_file_dna ne "no"){ $pipe_dna_input = "SELECTON_MSA".$upload_MSA_file_dna; } else{ $pipe_dna_input = "no_dna_input"; } open ERROR, $WorkingDir.$pipe_error; close ERROR; chmod 0755, $WorkingDir.$pipe_error; print LOG "running pipe_for_selecton::create_pipe with parameters:\n"; print LOG "$run_name $cgi_log_file $WorkingDir $rsml $FGiJ_pipe_pdb $pdbUpload $pipe_pdb_id $pipe_chain $pipe_dna_input $pipe_query $model_ref $pipe_distribute_categories $pipe_optimizeBL $genetic_ref $pipe_error $pipe_empirical $pipe_precision\n"; pipe_for_selecton::create_pipe($run_name, $cgi_log_file, $WorkingDir, $rsml, $FGiJ_pipe_pdb, $pdbUpload, $pipe_pdb_id, $pipe_chain, $pipe_dna_input, $pipe_query, $model_ref, $pipe_distribute_categories, $pipe_optimizeBL, $genetic_ref, $pipe_error, $pipe_precision, $pipe_empirical); # checking if there was an error and the pipe file was not created properly if (-e $WorkingDir.$pipe_error && !(-z $WorkingDir.$pipe_error)){ unless (open ERROR, $WorkingDir.$pipe_error){ &sys_error_exit("An error was found when trying to create the pipe file for Selecton.\nThe Error message should be written to file $WorkingDir"."$pipe_error, however this file could not be opened.\n"); } &sys_error_exit("An error was found while trying to create the pipe file : ".); } } ###################################################################### sub sys_error_exit{ my $err = shift; open OUTPUT, ">>$OutHtmlFile"; print OUTPUT $SysErrorDef; print OUTPUT $ContactDef; close OUTPUT; print LOG "\n$err\n"; &send_mail(); &send_mailSelecton("SYSTEM ERROR\n".$err); &stop_reload; exit; } ########################################################################################## # Stops the reload of the output page sub stop_reload { sleep 5; open OUTPUT, "<$OutHtmlFile"; flock OUTPUT,2; my @output = ; flock OUTPUT,8; close OUTPUT; open OUTPUT, ">$OutHtmlFile"; flock OUTPUT,2; foreach my $line (@output){ # we remove the refresh lines and the button which codes for Selecton cancelled job unless ($line =~ /REFRESH/ or $line =~ /NO-CACHE/ or $line =~ /ACTION=\"cgi.+kill/ or $line =~ /VALUE="Cancel Selecton Job"/ or $line =~/TYPE=hidden NAME="Qstat_file"/ or $line =~/TYPE=hidden NAME="selecton_http"/ or $line =~ /TYPE=hidden NAME="run_no"/ or $line =~ /TYPE=hidden NAME="cgi_pid"/ or $line =~ /Estimated run time is:/ or $line =~ /kill_process.cgi/ or $line =~ /