mirror of
http://43.156.76.180:8026/YuuMJ/EukPhylo.git
synced 2025-12-27 17:20:25 +08:00
1037 lines
36 KiB
Perl
1037 lines
36 KiB
Perl
#!/usr/bin/perl
|
|
|
|
#
|
|
########
|
|
use strict;
|
|
use warnings;
|
|
use File::Spec::Functions qw(rel2abs);
|
|
use File::Basename;
|
|
|
|
########
|
|
# context:
|
|
my $my_name=basename($0);
|
|
my $basedir=rel2abs('./');
|
|
#my $debug=9;
|
|
my $debug=defined($ENV{DEBUG_LEVEL}) ? $ENV{DEBUG_LEVEL} : 0;
|
|
my $t0=time;
|
|
$SIG{TERM} = \&my_sigtrap;
|
|
my $usage="\n\nCOS v2.05\n\nUsage:\n".
|
|
"\n $my_name case_id msa_method seq_type input_fasta_file output_dir [MSA_Status File] [tgz] [msa_program_path] [user guide_tree] [split number]--- [Additional MSA program parameters]\n\n".
|
|
"\tmsa_method= \t'CLW' : ClustalW2\n".
|
|
"\t \t'MFT' : mafft\n".
|
|
"\t \t'PRK' : prank\n".
|
|
"\t \t Append 'h' to do just HoT, e.g.: 'CLWh'\n\n".
|
|
"\tseq_type= \t'aa' | 'nt'\n\n".
|
|
"\tsplit_number=\t'all' | specific split number".
|
|
"\tSuccessful output at: (output_dir)/(case_id)_cos_(msa_method)/ or (output_dir)/(case_id)_cos_(msa_method).tgz\n".
|
|
"\tError output at: (output_dir)_err/(case_id)_cos_(msa_method)_err/ or (output_dir)_err/(case_id)_cos_(msa_method)_err.tgz\n";
|
|
|
|
|
|
# " 'MFT':mafft maximal | 'MAF':mafft partial | 'MA0' mafft minimal\n\n seq_type= 'aa' | 'nt'\n\n".
|
|
|
|
#####################################################
|
|
my %metcmds;
|
|
$metcmds{CLW}='CW2';
|
|
$metcmds{CW2}='CW2';
|
|
$metcmds{CW3}='CW2';
|
|
$metcmds{MFT}='MAF';
|
|
$metcmds{MFM}='MAF';
|
|
$metcmds{PRK}='PRK';
|
|
#####################################################
|
|
#####################################################
|
|
if ($#ARGV < 4) {
|
|
print $usage;
|
|
exit;
|
|
}
|
|
#####################################################
|
|
## inputs constants etc.
|
|
#####################################################
|
|
my ($idstr,$met,$seqtype_str,$input_file,$output_dir,$status_file,$tgz,$msa_program_path,$user_treefile,$user_split_number,@param)=@ARGV;
|
|
$met=~/^(.{3})(.?)/;
|
|
$met=$1;
|
|
my $hot=$2."C";
|
|
my $treefile;
|
|
if (!defined $metcmds{$met}) {
|
|
print "\nERROR: Unknown msa method $met\n$usage";
|
|
exit;
|
|
}
|
|
if (!defined $user_treefile) # user did not gave tree, we will creat it
|
|
{
|
|
$treefile='guide_tree.nwk';
|
|
}
|
|
my $met_init=\&{'met_init_'.$metcmds{$met}};
|
|
my $make_guide_tree=\&{'make_guide_tree_'.$metcmds{$met}};
|
|
my $align_seq=\&{'align_seq_'.$metcmds{$met}};
|
|
my $align_prof=\&{'align_prof_'.$metcmds{$met}};
|
|
|
|
$input_file=rel2abs($input_file);
|
|
if (! -e $input_file) {
|
|
print "\nERROR: File not found: $input_file\n$usage";
|
|
exit;
|
|
}
|
|
|
|
$output_dir=rel2abs($output_dir);
|
|
my $seqtype=($seqtype_str=~/^[nN]/ ? 1 : 0);
|
|
|
|
#FFU: codon model : seqtype 2
|
|
#my $seqtype=0;
|
|
#$seqtype=1 if ($seqtype_str=~/^[nN]/);
|
|
#$seqtype=2 if ($seqtype_str=~/^[cC]/);
|
|
|
|
$tgz= defined($tgz) ? ($tgz=~/^[tT]/ ? 1 : 0) : 0;
|
|
my $param=join(' ',(@param,' '));
|
|
$param=~s/---//;
|
|
my ($metver,$prkver);
|
|
$met_init->();
|
|
my ($prefix)= $input_file=~m%([^/.]*)\.[^/.]*$%;
|
|
my @extstr=('.fasta','.atsaf');
|
|
my @ht=('h','t');
|
|
my @hot=('heads','tails');
|
|
|
|
my $output_prefix=$idstr.'_cos_'.$met;
|
|
my $done_tar_file=$output_dir.'/'.$output_prefix.'.tgz';
|
|
my $err_tar_file=$output_dir.'_err/'.$output_prefix."_err.tgz";
|
|
|
|
my ($cmdstr,$rc);
|
|
|
|
#####################################################
|
|
#-----------------------------------------
|
|
# create directories and files
|
|
#=========================================
|
|
my $msgstr="--- init $my_name pid=$$ ".localtime()."\nmet=$met\ninfile=$input_file\nseqtype=$seqtype\noutdir=$output_dir\ntgz=$tgz\ndebug=$debug\n".
|
|
"basedir=$basedir\noutput_id=$output_prefix\n---\n";
|
|
`mkdir -p $output_prefix 2>&1`;
|
|
if (defined $user_treefile)
|
|
{
|
|
$treefile=$user_treefile;
|
|
system ("cp $treefile $output_prefix");
|
|
$treefile=getFilename($treefile);
|
|
if ($metcmds{$met} eq "MAF")
|
|
{
|
|
prepare_user_tree_mafft("$output_prefix/$treefile");
|
|
}
|
|
}
|
|
chdir($output_prefix);
|
|
|
|
open(LOGFILE,">>$output_prefix.log");
|
|
my $old_fh = select(LOGFILE);$| = 1;select($old_fh); #autoflush
|
|
log_print(0,1,$msgstr);
|
|
open(TIMEFILE,">>times.txt");
|
|
$old_fh = select(TIMEFILE);$| = 1;select($old_fh); #autoflush
|
|
print TIMEFILE $msgstr;
|
|
my_cmd("find . -size 0 -delete");# for resuming aborted runs with null files
|
|
#####################################################
|
|
#
|
|
#
|
|
#-----------------------------------------
|
|
# load input
|
|
#=========================================
|
|
my %hot_seqs;
|
|
fasta2hotseqs($input_file);
|
|
if ($hot_seqs{notu}<2) {
|
|
print "\nERROR: $input_file has less than 2 sequences....\n";
|
|
exit;
|
|
}
|
|
#=========================================
|
|
#-----------------------------------------
|
|
# make Guide Tree
|
|
#-----------------------------------------
|
|
#=========================================
|
|
my $infile='input.fasta';
|
|
#my $treefile='guide_tree.nwk'; # commented out, will be a param getting from the user
|
|
str_print($infile,@{$hot_seqs{fasta}[0]});
|
|
print "TREE_FILE:$treefile\n";#<STDIN>;
|
|
if (-e $treefile) {
|
|
log_print(1,1,"-$treefile exists");
|
|
}
|
|
else {
|
|
log_print(0,1,"-Making guide tree ...\n");
|
|
if ($status_file ne "")
|
|
{
|
|
open (STATUS,">$status_file.0");
|
|
print STATUS "<ul><li>Making guide tree</li></ul>\n";
|
|
close (STATUS);
|
|
}
|
|
$make_guide_tree->($infile,$treefile);
|
|
} #if
|
|
my %subtrees;
|
|
tree2split($treefile);
|
|
#=========================================
|
|
#-----------------------------------------
|
|
# do HoT
|
|
#-----------------------------------------
|
|
#=========================================
|
|
#$treefile="in.dnd"; # *** Insert here bootstrap tree ****
|
|
str_print($treefile,$subtrees{tree});
|
|
my ($outfile,$outfiler,$msar);
|
|
for my $i (0..1) {
|
|
$outfile="hot_".uc($ht[$i]);
|
|
if ($msar=MSA_check2tails($outfile.$extstr[0],1)) {
|
|
log_print(1,1,"-$outfile$extstr[0] exists");
|
|
next;
|
|
}
|
|
if ($i==1 && ($msar=MSA_check2tails($outfile.$extstr[1],1))) {
|
|
log_print(1,1,"-$outfile$extstr[1] exists");
|
|
str_print($outfile.$extstr[0],$msar);
|
|
next;
|
|
}
|
|
$infile='in'.$extstr[$i];
|
|
str_print($infile,@{$hot_seqs{fasta}[$i]});
|
|
log_print(0,1,"-Making $outfile ...\n");
|
|
if ($status_file ne "")
|
|
{
|
|
open (STATUS,">>$status_file.0");
|
|
print STATUS "<ul><li>Making $outfile</li></ul>\n";
|
|
close (STATUS);
|
|
}
|
|
$msar=$align_seq->($infile,$treefile,$outfile.$extstr[$i]);
|
|
str_print($outfile.$extstr[0],$msar) if ($i==1);
|
|
}#for i=0:1
|
|
if ($hot=~/^h/i) {
|
|
cleanup(0);
|
|
exit(0);
|
|
}
|
|
#=========================================
|
|
#-----------------------------------------
|
|
# do CoS
|
|
#-----------------------------------------
|
|
#=========================================
|
|
my (@pfiles,@tfiles,$fasta);
|
|
my @SplitsToCheck; # add to allow only specific split
|
|
if ($user_split_number eq "ALL") {@SplitsToCheck=(0..$subtrees{nbr}-1);} # add to allow only specific split
|
|
else {@SplitsToCheck=($user_split_number);}
|
|
#for my $i (0..$subtrees{nbr}-1) { # splits
|
|
for my $i (@SplitsToCheck) { # splits
|
|
my $isprof= $i<$subtrees{notu} ? 0 : 1 ;
|
|
|
|
#===== check resumed runs
|
|
my $skip=1;
|
|
for my $j (0..$isprof) { #left prof
|
|
for my $j1 (0..1) { #right prof
|
|
for my $k (0..1) { #hot
|
|
$outfile=$subtrees{br}[$i][0]{name}.'_'.$ht[$j].$ht[$j1].uc($ht[$k]);
|
|
if ($msar=MSA_check2tails($outfile.$extstr[0],1)) {
|
|
log_print(1,1,"-$outfile$extstr[0] exists");
|
|
next;
|
|
}
|
|
if ($k==1 && ($msar=MSA_check2tails($outfile.$extstr[1],1))) {
|
|
log_print(1,1,"-$outfile$extstr[1] exists");
|
|
str_print($outfile.$extstr[0],$msar);
|
|
next;
|
|
}
|
|
$skip=0;
|
|
} #for k=0..1 hot
|
|
} #for j1=0..1
|
|
} #for j=0..0/1
|
|
#====================================
|
|
|
|
if ($skip) {
|
|
log_print(0,1,"-Skiping branch ".($i+1)." of $subtrees{nbr} ...\n");
|
|
next;
|
|
}
|
|
print "-Making branch ".($i+1)." of $subtrees{nbr} ...\n";
|
|
if ($status_file ne "")
|
|
{
|
|
open (MSA_STATUS,">$status_file");
|
|
print MSA_STATUS "<ul><li>Making branch ".($i+1)." of $subtrees{nbr}</li></ul>\n";
|
|
close (MSA_STATUS);
|
|
}
|
|
#-----------------------------------------
|
|
# do 2 profiles x hot =4
|
|
for my $j (0..1) { #left-right
|
|
my %prof=%{$subtrees{br}[$i][$j]};
|
|
if ($j==0 && !$isprof) { # single sequence
|
|
$tfiles[$j]='';
|
|
for my $k (0..1) { # HoT single sequence dummy
|
|
$pfiles[$j][$k]="prof$i\_$j";
|
|
str_print($pfiles[$j][$k].$extstr[$k] , (@{$hot_seqs{fasta}[$k]})[@{$prof{otu}}]);
|
|
} #for k=0..1 HoT
|
|
} # if single seq
|
|
else { # notu>1 , profile
|
|
$tfiles[$j]="tree_$i\_$j.dnd";
|
|
str_print($tfiles[$j],$prof{tree});
|
|
for my $k (0..1) { # HoT
|
|
$pfiles[$j][$k]="prof$i\_$j$k";
|
|
$outfile=$pfiles[$j][$k].$extstr[$k];
|
|
$outfiler=$pfiles[$j][$k].$extstr[1-$k];
|
|
if ($msar=MSA_check2tails($outfile,1)) {
|
|
log_print(1,1,"-$outfile exists");
|
|
str_print($outfiler,$msar) if (! -e $outfiler);
|
|
next;
|
|
}
|
|
$infile='in'.$extstr[$k];
|
|
str_print($infile , (@{$hot_seqs{fasta}[$k]})[@{$prof{otu}}]);
|
|
$msar=$align_seq->($infile,$tfiles[$j],$outfile);
|
|
str_print($outfiler,$msar);
|
|
} #for k=0..1 HoT
|
|
} #else prof
|
|
} #for j=0..1 right-left
|
|
#-----------------------------------------
|
|
# do 8 (or 4 for term) hot profile alignment
|
|
$tfiles[2]="tree_$i.dnd";
|
|
str_print($tfiles[2],$subtrees{br}[$i][2]{tree});
|
|
for my $j (0..$isprof) { #left prof
|
|
for my $j1 (0..1) { #right prof
|
|
for my $k (0..1) { #hot
|
|
$outfile=$subtrees{br}[$i][0]{name}.'_'.$ht[$j].$ht[$j1].uc($ht[$k]);
|
|
if ($msar=MSA_check2tails($outfile.$extstr[0],1)) {
|
|
log_print(1,1,"-$outfile$extstr[0] exists");
|
|
next;
|
|
}
|
|
if ($k==1 && ($msar=MSA_check2tails($outfile.$extstr[1],1))) {
|
|
log_print(1,1,"-$outfile$extstr[1] exists");
|
|
str_print($outfile.$extstr[0],$msar);
|
|
next;
|
|
}
|
|
$msar=$align_prof->($pfiles[0][$j].$extstr[$k],$pfiles[1][$j1].$extstr[$k],$tfiles[0],$tfiles[1],$tfiles[2],$outfile.$extstr[$k]);
|
|
str_print($outfile.$extstr[0],$msar) if ($k==1);
|
|
} #for k=0..1 hot
|
|
} #for j1=0..1
|
|
} #for j=0..0/1
|
|
my_cmd("rm -f prof* tr* in* *$extstr[1]") if ($debug<3) ;
|
|
#-----------------------------------------
|
|
} #for i=0..nbr-1 splits
|
|
#-----------------------------------------
|
|
# done CoS
|
|
#=========================================
|
|
cleanup(0);
|
|
exit(0);
|
|
|
|
################################################
|
|
# main end ------------------------------------#
|
|
################################################
|
|
|
|
|
|
#################################################
|
|
##--- mafft functions -------------------------#
|
|
#################################################
|
|
|
|
|
|
sub met_init_MAF {
|
|
if ($seqtype==2) {
|
|
print "ERROR: Codon models not supported by MAFFT, use NT instead.\n";
|
|
exit;
|
|
}
|
|
$metver=$msa_program_path.'-profile';
|
|
my $rc=my_cmd("which $metver 2>&1");
|
|
if ($rc=~/which: no/) {
|
|
print "ERROR: Could not find $metver, please make sure that $metver is on your path and try again.\n";
|
|
exit;
|
|
}
|
|
$metver=$msa_program_path;
|
|
$rc=my_cmd("which $metver 2>&1");
|
|
if ($rc=~/which: no/) {
|
|
print "ERROR: Could not find $metver, please make sure that $metver is on your path and try again.\n";
|
|
exit;
|
|
}
|
|
|
|
# $metver=$metver.' --localpair --maxiterate 1000' if (substr($met,2,1) ne '0');
|
|
# $metver=$metver.' --localpair --maxiterate 1000' if ($met!~/[01]$/);
|
|
|
|
$metver=$metver.' '.$param if (substr($met,2,1) eq 'T');
|
|
$metver=$metver.' --localpair --maxiterate 1000' if (substr($met,2,1) eq 'M');
|
|
|
|
$metver=$metver.($seqtype==0 ? " --amino" : " --nuc");
|
|
$metver=$metver." --quiet" if ($debug<2) ;
|
|
log_print(1,0,"metvar: ".$metver);
|
|
return;
|
|
}#sub met_init_MAF
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub make_guide_tree_MAF {
|
|
my ($infile,$treefile)=@_;
|
|
my $cmdstr="($metver --treeout $infile | tr '[:lower:]' '[:upper:]' | sed 's/>SEQ/>seq/' >hot_H$extstr[0] 2>&1 ) 2>&1 ";
|
|
my $rc=my_cmd($cmdstr);# produces input.fasta.tree
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : mafft error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$treefile ".join(',',@rtime)."\n";
|
|
MSA_check2tails("hot_H$extstr[0]",0);
|
|
#$cmdstr="sed 's/_*//g;s/[0-9]*s/s/g;s/\$/;/' $infile.tree >$treefile;rm $infile.tree"; v2.05: mafft changed treefile format
|
|
#$cmdstr="sed 's/_*//g;s/[0-9]*s/s/g' $infile.tree >$treefile;rm $infile.tree"; #DEBUG
|
|
$cmdstr="sed 's/_*//g;s/[0-9]*s/s/g' $infile.tree >$treefile";
|
|
$rc=my_cmd($cmdstr);# makes guide_tree.nwk
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : mafft guide tree sed error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
return;
|
|
}#sub make_guide_tree_MAF
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub align_seq_MAF {
|
|
my ($infile,$treefile,$outfile)=@_;
|
|
my $cmdstr="($metver --treein $treefile $infile | tr '[:lower:]' '[:upper:]' | sed 's/>SEQ/>seq/' >$outfile 2>&1 ) 2>&1";
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : mafft error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$outfile ".join(',',@rtime)."\n";
|
|
return MSA_check2tails($outfile,0);
|
|
}#sub align_seq_MAF
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub align_prof_MAF {
|
|
my ($pfile1,$pfile2,$tfile1,$tfile2,$tfile3,$ofile)=@_;
|
|
# my $cmdstr = ($tfile1 eq '') ? $pfile1 : "--seed $pfile1 /dev/null" ;# terminal/internal branch
|
|
# $cmdstr="(time -p $metver --treein $tfile3 --seed $pfile2 $cmdstr | sed 's/>_seed_/>/' | tr [:lower:] [:upper:] | sed 's/>SEQ/>seq/' >$ofile 2>&1 )2>&1";
|
|
# switched order on purpose! - to accomodate seed vs. seed and seed vs. seq and mafft trees
|
|
# $cmdstr=~s/--localpair// if (substr($met,2,1) eq 'F'); #if both localpair and treein, mafft USED TO says: "Both structure and user tree have been given. Not yet supported!"
|
|
# $cmdstr="(time -p mafft-profile $pfile2 $pfile1 2>&1 >$ofile )2>&1" if (substr($met,2,1) eq '1');
|
|
# print "$met \n$cmdstr\n";
|
|
# exit;
|
|
$cmdstr="($msa_program_path-profile $pfile2 $pfile1 | tr '[:lower:]' '[:upper:]' | sed 's/>SEQ/>seq/' 2>&1 >$ofile )2>&1";
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : mafft error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$ofile ".join(',',@rtime)."\n";
|
|
return MSA_check2tails($ofile,0);
|
|
}#sub align_prof_MAF
|
|
|
|
################################################
|
|
# end mafft functions ------------------------#
|
|
################################################
|
|
|
|
#################################################
|
|
##--- prank functions -------------------------#
|
|
#################################################
|
|
|
|
|
|
sub met_init_PRK {
|
|
#$metver='prank';
|
|
$metver=$msa_program_path;
|
|
my $rc=my_cmd("which $metver 2>&1");
|
|
if ($rc=~/which: no/) {
|
|
print "ERROR: Could not find $metver, please make sure that $metver is on your path and try again.\n";
|
|
exit;
|
|
}
|
|
$rc=my_cmd("$metver 2>&1");
|
|
# added in ver 2.04: prank interface change
|
|
# $prkver= $rc=~/showtree/ ? 1 : 0 ;
|
|
$prkver=0;
|
|
if ($rc=~/.*showall.*/){$prkver="130410";} # iterate alignment also when tree is provided
|
|
elsif ($rc=~/.*prunedata.*/){$prkver="121218";}# last aln named best
|
|
elsif ($rc=~/.*showanc.*/){$prkver="120626";} # last aln name when tre provided changed (2->1)
|
|
elsif ($rc=~/.*showtree.*/){$prkver="101018";} # no more noxml and notree
|
|
$metver=$metver.' '.$param;
|
|
$metver=$metver." -noxml -nopost" if ($prkver==0);
|
|
$metver=$metver." -quiet" if ($debug<2) ;
|
|
$metver=$metver." -codon" if ($seqtype==2) ;
|
|
log_print(1,0,"metver: ".$metver."\nprank version is ".$prkver);
|
|
print("metver: ".$metver."\nprank version is ".$prkver);
|
|
return;
|
|
}#sub met_init_PRK
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub make_guide_tree_PRK {
|
|
my ($infile,$treefile)=@_;
|
|
my $cmdstr="($metver -d=$infile -o=prank_gdt 2>&1) 2>&1;mv prank_gdt.2.fas hot_H$extstr[0];mv prank_gdt.2.dnd $treefile";
|
|
if ($prkver eq "121218")
|
|
{
|
|
$cmdstr="($metver -d=$infile -o=prank_gdt -showtree 2>&1) 2>&1;mv prank_gdt.best.fas hot_H$extstr[0];mv prank_gdt.best.dnd $treefile";
|
|
}
|
|
elsif ($prkver!=0) {
|
|
$cmdstr="($metver -d=$infile -o=prank_gdt -showtree 2>&1) 2>&1;mv prank_gdt.2.fas hot_H$extstr[0];mv prank_gdt.2.dnd $treefile";
|
|
}
|
|
my $rc=my_cmd($cmdstr);# produces input.fasta.tree
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : prank error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$treefile ".join(',',@rtime)."\n";
|
|
MSA_check2tails("hot_H$extstr[0]",0);
|
|
my_cmd("rm -f prank_*") if ($debug<3) ;
|
|
return;
|
|
}#sub make_guide_tree_PRK
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub align_seq_PRK {
|
|
my ($infile,$treefile,$outfile)=@_;
|
|
my $cmdstr="($metver -d=$infile -t=$treefile -o=prank_$outfile -notree 2>&1) 2>&1;mv prank_$outfile.1.fas $outfile"; # HERE was 2.fas
|
|
if ($prkver >= 121218){
|
|
$cmdstr="($metver -d=$infile -t=$treefile -o=prank_$outfile 2>&1) 2>&1;mv prank_$outfile.best.fas $outfile";
|
|
}
|
|
elsif ($prkver!=0) {
|
|
$cmdstr="($metver -d=$infile -t=$treefile -o=prank_$outfile 2>&1) 2>&1;mv prank_$outfile.2.fas $outfile";
|
|
}
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : prank error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$outfile ".join(',',@rtime)."\n";
|
|
my $rmsa=MSA_check2tails($outfile,0);
|
|
my_cmd("rm -f prank_*") if ($debug<3) ;
|
|
return $rmsa;
|
|
}#sub align_seq_PRK
|
|
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
|
|
sub align_prof_PRK {
|
|
my ($pfile1,$pfile2,$tfile1,$tfile2,$tfile3,$ofile)=@_;
|
|
$cmdstr="sed 's/^>.*\$/& group_a/' $pfile1 >prank_$ofile\_inp;sed 's/^>.*\$/& group_b/' $pfile2 >>prank_$ofile\_inp";
|
|
$rc=my_cmd($cmdstr);# makes prank profile input
|
|
#exit;
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : prank input sed error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my $cmdstr="($metver -partaligned -d=prank_$ofile\_inp -t=$tfile3 -o=prank_$ofile -notree 2>&1) 2>&1;mv prank_$ofile.0.fas $ofile";
|
|
if ($prkver>=121218)
|
|
{
|
|
$cmdstr="($metver -partaligned -d=prank_$ofile\_inp -t=$tfile3 -o=prank_$ofile 2>&1) 2>&1;mv prank_$ofile.fas $ofile";
|
|
}
|
|
elsif ($prkver>0) {
|
|
$cmdstr="($metver -partaligned -d=prank_$ofile\_inp -t=$tfile3 -o=prank_$ofile 2>&1) 2>&1;mv prank_$ofile.0.fas $ofile";
|
|
}
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : prank error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$ofile ".join(',',@rtime)."\n";
|
|
my $rmsa=MSA_check2tails($ofile,0);
|
|
my_cmd("rm -f prank_*") if ($debug<3) ;
|
|
return $rmsa;
|
|
}#sub align_prof_PRK
|
|
|
|
################################################
|
|
# end prank functions ------------------------#
|
|
################################################
|
|
|
|
################################################
|
|
# ClustalW2 functions ------------------------#
|
|
################################################
|
|
|
|
sub met_init_CW2 {
|
|
if ($seqtype==2) {
|
|
print "ERROR: Codon models not supported by CLUSTALW2, use NT instead.\n";
|
|
exit;
|
|
}
|
|
#$metver='clustalw2';
|
|
$metver=$msa_program_path;
|
|
my $rc=my_cmd("which $metver 2>&1");
|
|
if ($rc=~/which: no/) {
|
|
print "ERROR: Could not find $metver, please make sure that $metver is on your path and try again.\n";
|
|
exit;
|
|
}
|
|
$metver=$metver.' '.$param if (substr($met,2,1) eq 'W');
|
|
$metver=$metver.' -iteration=alignment' if (substr($met,2,1) eq '2') ;
|
|
$metver=$metver.' -iteration=tree' if (substr($met,2,1) eq '3') ;
|
|
$metver=$metver.($seqtype==0 ? " -type=PROTEIN" : " -type=DNA");
|
|
$metver=$metver." -quiet" if ($debug<2) ;
|
|
log_print(1,0,"metvar: ".$metver);
|
|
return;
|
|
}#sub met_init_CW2
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
sub make_guide_tree_CW2 {
|
|
my ($infile,$treefile)=@_;
|
|
$cmdstr="($metver -infile=$infile -newtree=$treefile 2>&1) 2>&1";
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : clustalw2 error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$treefile ".join(',',@rtime)."\n";
|
|
return;
|
|
}#sub make_guide_tree_CW2
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
sub align_seq_CW2 {
|
|
my ($infile,$treefile,$outfile)=@_;
|
|
$cmdstr="($metver -infile=$infile -outfile=$outfile -output=fasta -outorder=input -usetree=$treefile 2>&1) 2>&1";
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : clustalw2 error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$outfile ".join(',',@rtime)."\n";
|
|
return MSA_check2tails($outfile,0);
|
|
}#sub align_seq_CW2
|
|
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
|
|
sub align_prof_CW2 {
|
|
my ($pfile1,$pfile2,$tfile1,$tfile2,$tfile3,$ofile)=@_;
|
|
my $cmdstr=($tfile1 eq '') ? '' : "-usetree1=$tfile1";# terminal/internal branch
|
|
$cmdstr="($metver -profile -profile1=$pfile1 -profile2=$pfile2 $cmdstr -usetree2=$tfile2 -output=fasta -outfile=$ofile 2>&1) 2>&1";
|
|
my $rc=my_cmd($cmdstr);
|
|
if ($rc=~/err/i) {
|
|
log_print(0,2,"ERROR: $my_name $$ : clustalw2 error:\n$cmdstr\n---\n$rc\n---\n");
|
|
cleanup(1);
|
|
}
|
|
my @rtime= $rc=~m/\nreal ([0-9\.]+).*\nuser ([0-9\.]+).*\nsys ([0-9\.]+)/;
|
|
print TIMEFILE "$ofile ".join(',',@rtime)."\n";
|
|
return MSA_check2tails($ofile,0);
|
|
}#sub align_prof_CW2
|
|
|
|
|
|
#################################################
|
|
## end clustalw2 functions -------------------#
|
|
#################################################
|
|
|
|
sub MSA_check2tails {
|
|
# Read fasta MSA file and check that sequences match global %hot_seqs
|
|
# Reverse residue order of sequences and return in string
|
|
# MSA_check2tails($input_file,$mode)
|
|
# mode=[1:return tails or shutdown on error, |
|
|
# 0:just check, dont shutdown and return boolean, this is for resuming aborted runs]
|
|
#
|
|
my ($file,$mode)=@_;
|
|
if (! -e $file) {
|
|
return 0 if $mode;
|
|
log_print(0,2,"ERROR: $my_name $$ : File not found: $_[0]");
|
|
cleanup(1);
|
|
}
|
|
open(INFILE,$file);
|
|
local $/='>';
|
|
$_=<INFILE>;
|
|
if ($_ ne '>') {
|
|
close(INFILE);
|
|
return 0 if $mode;
|
|
log_print(0,2,"ERROR: $my_name $$ : File not in fasta format: $_[0]");
|
|
cleanup(1);
|
|
}
|
|
my $sdir= ($file=~m/$extstr[0]$/) ? 0 : 1; #seq dirction from file extention
|
|
my $fastar='';
|
|
while ($_=<INFILE>) {
|
|
my ($name,$seq) = $_=~m/^([^\n\r]*)([^>]*)/s;
|
|
$seq=~s/\s//g;
|
|
my $seqr=reverse($seq);
|
|
$seqr=~s/.{1,60}/$&\n/g;
|
|
$fastar=$fastar.">$name\n$seqr";
|
|
my ($sid)= $name=~m/(\d{4})$/;
|
|
$seq=~s/-//g;
|
|
log_print(6,0,"$name $sdir $sid:\n".$seq."\n ref:\n".$hot_seqs{seqs}[$sdir][$sid]."\n-----\n");
|
|
if ($seq ne $hot_seqs{seqs}[$sdir][$sid]) {
|
|
close(INFILE);
|
|
return 0 if $mode;
|
|
log_print(0,2,"ERROR: $my_name $$ : Sequence mismatch in file: $_[0] $name :\n---\n$seq\nshould be:\n$hot_seqs{seqs}[$sdir][$sid]\n---\n");
|
|
cleanup(1);
|
|
}
|
|
}
|
|
close(INFILE);
|
|
return $fastar;
|
|
}#sub MSA_check2tails
|
|
#################################################
|
|
##----------------------------------------------#
|
|
#################################################
|
|
sub fasta2hotseqs {
|
|
# Read fasta file, write sequence names file, fill sequence structure %hot_seqs
|
|
#
|
|
if (! -e $_[0]) {
|
|
log_print(0,2,"ERROR: $my_name $$ : File not found: $_[0]");
|
|
cleanup(1);
|
|
}
|
|
open(INFILE,$_[0]);
|
|
local $/=">";
|
|
$_=<INFILE>;
|
|
if ($_ ne '>') {
|
|
close(INFILE);
|
|
log_print(0,2,"ERROR: $my_name $$ : File not in fasta format: $_[0]");
|
|
cleanup(1);
|
|
}
|
|
my $names_txt='';
|
|
my $i=0;
|
|
while ($_=<INFILE>) {
|
|
# catch names containing '>'
|
|
while (($_!~/\n>$/) && (my $tmp=<INFILE>)) {
|
|
$_=$_.$tmp;
|
|
}
|
|
my $sn=sprintf('seq%04u',$i);
|
|
push @{$hot_seqs{sid}},$sn;
|
|
my ($name,$seq) = $_=~m/^([^\n\r]*)([^>]*)/s;
|
|
push @{$hot_seqs{ids}},$name;
|
|
$names_txt=$names_txt.$sn.' '.$name."\n";
|
|
$seq=~s/[\s-]//g;
|
|
$seq=uc($seq);
|
|
push @{$hot_seqs{seqs}[0]},$seq;
|
|
my $seq1=$seq;
|
|
$seq1=~s/.{1,60}/$&\n/g;
|
|
push @{$hot_seqs{fasta}[0]},">$sn\n$seq1";
|
|
$seq=reverse($seq);
|
|
push @{$hot_seqs{seqs}[1]},$seq;
|
|
$seq=~s/.{1,60}/$&\n/g;
|
|
push @{$hot_seqs{fasta}[1]},">$sn\n$seq";
|
|
$i++;
|
|
}#while INFILE
|
|
close(INFILE);
|
|
$hot_seqs{notu}=$i;
|
|
open(OUTFILE,">seq_names.txt");
|
|
print OUTFILE $names_txt;
|
|
close(OUTFILE);
|
|
return;
|
|
}#sub fasta2hotseqs
|
|
#################################################
|
|
##----------------------------------------------#
|
|
#################################################
|
|
sub tree2split {
|
|
if (! -e $_[0]) {
|
|
log_print(0,2,"ERROR: $my_name $$ : File not found: $_[0]");
|
|
cleanup(1);
|
|
}
|
|
my ($nwstr,$nbr,$notu,@otu,@otu2,@bid,@len,@br2,%otus);
|
|
open(INFILE,$_[0]);
|
|
# local $/=';'; v2.05: mafft changed treefile format
|
|
local $/;
|
|
$nwstr=<INFILE>;
|
|
close(INFILE);
|
|
$nwstr=~s/;*$/;/g;
|
|
$nwstr=~s/\s//g;
|
|
|
|
$subtrees{tree}=$nwstr;
|
|
chop $nwstr;
|
|
if ($hot_seqs{notu}==2) {
|
|
if ($metcmds{$met} eq "MAF") {
|
|
$nwstr=~m/.*:([.\d]*),.*:([.\d]*).*/;
|
|
$subtrees{tree}="1 2 $1 $2\n";
|
|
}#if MAF
|
|
$subtrees{nbr}=0;
|
|
return;
|
|
}#notu==2
|
|
$nwstr=~tr/()/<>/;
|
|
$nbr=0;
|
|
log_print(1,0,"nwstr=\n$nwstr");
|
|
################## terminal branches
|
|
while ($nwstr=~m/[<,]([^,:<>]*):([^,:<>]*)[>,]/) {
|
|
$br2[$nbr]=[-1,-1];
|
|
$otu[$nbr]=$1;
|
|
$otus{$1}=$nbr;
|
|
$len[$nbr]=$2;
|
|
$bid[$nbr][0]=$nbr;
|
|
$nwstr=~s/$1:$2/$nbr/;
|
|
log_print(6,0,"tbrn nbr=$nbr\nnwstr=\n$nwstr\n---\n");
|
|
$nbr++;
|
|
}
|
|
$notu=$nbr;
|
|
############### internal branches
|
|
while ($nwstr=~m/<(\d*),(\d*)>:([^,:<>]*)/) {
|
|
$br2[$nbr]=[$1,$2];
|
|
$otu[$nbr]=sprintf("<%s:%f,%s:%f>",$otu[$1],$len[$1],$otu[$2],$len[$2]);
|
|
#$otu[$nbr]="<".$otu[$1].":".$len[$1].",".$otu[$2].":".$len[$2].">";
|
|
$len[$nbr]=$3;
|
|
$bid[$nbr]=[sort(@{$bid[$1]},@{$bid[$2]})];
|
|
$nwstr=~s/<$1,$2>:$3/$nbr/;
|
|
log_print(6,0,"ibrn nbr=$nbr\nnwstr=\n$nwstr\n---\n");
|
|
$nbr++;
|
|
}
|
|
############## last 3
|
|
my @z=$nwstr=~/,/g; #check if rooted- ',' / unrooted- ',,'
|
|
if ($#z==0) {#change rooted to unrooted
|
|
$nwstr=~m/<(\d*),(\d*)>/;
|
|
$nbr--;
|
|
my $i= ($1==$nbr) ? $2 : $1 ;
|
|
$len[$i]=$len[$1]+$len[$2];
|
|
$nwstr=~s/$nbr/$br2[$nbr][0],$br2[$nbr][1]/;
|
|
log_print(6,0,"unroot nbr=$nbr\nnwstr=\n$nwstr\n---\n");
|
|
}# if rooted
|
|
$nwstr=~m/<(\d*),(\d*),(\d*)>/;
|
|
$otu2[$1]=sprintf("<%s:%f,%s:%f>",$otu[$2],$len[$2],$otu[$3],$len[$3]);
|
|
$otu2[$2]=sprintf("<%s:%f,%s:%f>",$otu[$3],$len[$3],$otu[$1],$len[$1]);
|
|
$otu2[$3]=sprintf("<%s:%f,%s:%f>",$otu[$1],$len[$1],$otu[$2],$len[$2]);
|
|
# $otu2[$1]="<".$otu[$2].":".$len[$2].",".$otu[$3].":".$len[$3].">";
|
|
# $otu2[$2]="<".$otu[$3].":".$len[$3].",".$otu[$1].":".$len[$1].">";
|
|
# $otu2[$3]="<".$otu[$1].":".$len[$1].",".$otu[$2].":".$len[$2].">";
|
|
############## retrace splits
|
|
for my $i (reverse(0..$nbr-1)) {
|
|
if ($br2[$i][0]>-1) {
|
|
for my $j (0..1) {
|
|
|
|
$otu2[$br2[$i][$j]]=sprintf("<%s:%f,%s:%f>",$otu2[$i],$len[$i],$otu[$br2[$i][1-$j]],$len[$br2[$i][1-$j]]);
|
|
#$otu2[$br2[$i][$j]]="<".$otu2[$i].":".$len[$i].",".$otu[$br2[$i][1-$j]].":".$len[$br2[$i][1-$j]].">";
|
|
}
|
|
}
|
|
}
|
|
############# recode otus
|
|
$subtrees{notu}=$notu;
|
|
$subtrees{otus}=[sort keys %otus];
|
|
$subtrees{nbr}=$nbr;
|
|
$subtrees{len}=[@len];
|
|
my @i2i;
|
|
my $oid=0;
|
|
for my $oids (@{$subtrees{otus}}) {
|
|
$i2i[$otus{$oids}]=$oid;
|
|
$oid++;
|
|
}
|
|
|
|
############ otu ids, complement, and structure fill
|
|
my @splits_txt;
|
|
for my $i (0..$nbr-1) {
|
|
my @a0=@{$bid[$i]};
|
|
for my $j (0..$#a0) {
|
|
$a0[$j]=$i2i[$a0[$j]];
|
|
}
|
|
@a0= sort { $a <=> $b } @a0;
|
|
my @a1=(0..$notu-1);
|
|
for my $j (reverse(@a0)) {
|
|
splice(@a1,$j,1);
|
|
}#for j
|
|
if ($#a0+$#a1+2 != $notu) {
|
|
log_print(0,2,"ERROR: subtrees error:\n ".($#a0+1)." : ".join(",",@a0)."\n ".($#a1+1)." : ".join(",",@a1));
|
|
cleanup(1);
|
|
}#if
|
|
$subtrees{br}[$i][0]{notu}=$#a0+1;
|
|
$subtrees{br}[$i][1]{notu}=$#a1+1;
|
|
$subtrees{br}[$i][2]{notu}=$notu;
|
|
$subtrees{br}[$i][0]{otu}=[@a0];
|
|
$subtrees{br}[$i][1]{otu}=[@a1];
|
|
$subtrees{br}[$i][2]{otu}=[@a1,@a0];# switched order on purpose, for mafft trees
|
|
|
|
my $nwstr0=$otu[$i];
|
|
$nwstr0=~tr/<>/()/;
|
|
$subtrees{br}[$i][0]{tree}=$nwstr0.";";
|
|
$nwstr=$otu2[$i];
|
|
$nwstr=~tr/<>/()/;
|
|
$subtrees{br}[$i][1]{tree}=$nwstr.";";
|
|
if ($metcmds{$met} eq "MAF") {
|
|
$otu2[$3]=sprintf("<%s:%f,%s:%f>",$otu[$1],$len[$1],$otu[$2],$len[$2]);
|
|
$subtrees{br}[$i][2]{tree}=sprintf("(%s:%f,%s:%f);",$nwstr0,($len[$i]/2),$nwstr,($len[$i]/2));
|
|
}
|
|
else {
|
|
$subtrees{br}[$i][2]{tree}=sprintf("(%s:%f,%s;",$nwstr0,$len[$i],substr($nwstr,1));
|
|
}
|
|
$subtrees{br}[$i][0]{name}=sprintf('b%u#%04u',(($i<$notu)?1:0),$i);
|
|
|
|
my $split_disp="$subtrees{br}[$i][0]{name} : $subtrees{br}[$i][0]{notu}/$subtrees{br}[$i][1]{notu} : [".join(",",@{$subtrees{br}[$i][0]{otu}})."]/[".join(",",@{$subtrees{br}[$i][1]{otu}})."]\n";
|
|
push @splits_txt,$split_disp;
|
|
if ($debug>1) {
|
|
$split_disp="---- tree2split:\n".$split_disp." left: $subtrees{br}[$i][0]{tree}\n".
|
|
" right: $subtrees{br}[$i][1]{tree}\n"." joined: $subtrees{br}[$i][2]{tree}\n-------\n";
|
|
log_print(6,0,$split_disp);
|
|
}#if
|
|
}# for i
|
|
open(OUTFILE,">splits.txt");
|
|
print OUTFILE join('',@splits_txt);
|
|
close(OUTFILE);
|
|
$subtrees{br}[$subtrees{nbr}][0]{tree}=$subtrees{tree};
|
|
$subtrees{br}[$subtrees{nbr}][0]{otu}=[0..$subtrees{notu}-1];
|
|
$subtrees{br}[$subtrees{nbr}][0]{notu}=$subtrees{notu};
|
|
$subtrees{br}[$subtrees{nbr}][1]{notu}=0;
|
|
$subtrees{br}[$subtrees{nbr}][2]{notu}=0;
|
|
if ($metcmds{$met} eq "MAF") {
|
|
newick2mafft();
|
|
}#if
|
|
}#sub tree2split
|
|
#################################################
|
|
##----------------------------------------------#
|
|
#################################################
|
|
|
|
sub newick2mafft {
|
|
for my $i (0..$subtrees{nbr}) {
|
|
for my $j (0..2) {
|
|
next if ($subtrees{br}[$i][$j]{notu}<2);
|
|
log_print(6,0,"-br $i,$j : $subtrees{br}[$i][$j]{tree}\n");
|
|
log_print(6,0,"-br $i,$j : ".join(",",@{$subtrees{br}[$i][$j]{otu}})."\n");
|
|
my $tr=$subtrees{br}[$i][$j]{tree};
|
|
$tr=~tr/()/<>/;
|
|
my $k;
|
|
for $k (1..$subtrees{br}[$i][$j]{notu}) {
|
|
$tr=~s/$subtrees{'otus'}[$subtrees{br}[$i][$j]{otu}[$k-1]]/$k/;
|
|
}
|
|
my @mtr;
|
|
while ($tr=~/<(\d+):([\d\.]+),(\d+):([\d\.]+)>/) {
|
|
if ($1<$3) {
|
|
$k=$1;
|
|
push @mtr,sprintf("%5d%5d%11.5f%11.5f",$1,$3,$2,$4);
|
|
}
|
|
else {
|
|
$k=$3;
|
|
push @mtr,sprintf("%5d%5d%11.5f%11.5f",$3,$1,$4,$2);
|
|
}
|
|
$tr=~s/<$1:$2,$3:$4>/$k/;
|
|
}
|
|
log_print(6,0,"---\n$i $j : $subtrees{br}[$i][$j]{notu} $#mtr\n$subtrees{br}[$i][$j]{tree} \n".join(' ; ',@mtr)."\n---\n");
|
|
$subtrees{br}[$i][$j]{tree}=join("\n",@mtr)."\n";
|
|
log_print(6,0,"---\n$subtrees{br}[$i][$j]{tree}---\n");
|
|
}#j
|
|
}#i
|
|
log_print(3,1,"- $subtrees{tree}\n----\n");
|
|
$subtrees{tree}=$subtrees{br}[$subtrees{nbr}][0]{tree};
|
|
log_print(3,1,"-\n$subtrees{tree}----\n");
|
|
# exit;
|
|
return;
|
|
}#sub newick2mafft
|
|
|
|
################################################
|
|
# end of logic functions ---------------------#
|
|
################################################
|
|
|
|
|
|
################################################
|
|
# misc. supporting functions ---------------#
|
|
################################################
|
|
|
|
|
|
|
|
sub str_print {
|
|
my $file=shift(@_);
|
|
open(OUTFILE,">$file");
|
|
print OUTFILE @_;
|
|
close(OUTFILE);
|
|
return;
|
|
}#sub str_print
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
sub log_print { # debug_level, whereto 0:2=LOG|OUT|ERR , msgstr
|
|
my ($level,$to,$msgstr)=@_;
|
|
return if ($level>$debug);
|
|
chomp($msgstr);
|
|
$msgstr=$msgstr."\n";
|
|
my ($package, $filename, $line) = caller;
|
|
$msgstr="@ line $line of $filename\n$msgstr\@---\n"
|
|
if ($msgstr!~m/^-/);
|
|
print LOGFILE $msgstr if (defined(fileno LOGFILE));
|
|
print STDOUT $msgstr if ($to>0);
|
|
print STDERR $msgstr if ($to>1);
|
|
return;
|
|
}#sub log_print
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
sub my_cmd {
|
|
my $cmdstr=$_[0];
|
|
my ($package, $filename, $line) = caller;
|
|
log_print(0,$debug,"---- sh: line $line of $filename\n$cmdstr\n");
|
|
my $rc=`$cmdstr`;
|
|
log_print(1,$debug-1,"---- output:\n$rc\n----\n");
|
|
return $rc;
|
|
}#sub my_cmd
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
sub cleanup { #1st argument : exit state [0:normal ,1:error] 2nd: msgstr
|
|
$SIG{TERM}='IGNORE';
|
|
my $state=$_[0];
|
|
close(LOGFILE);
|
|
print TIMEFILE "Total: ".join(',',times,time-$t0)."\n".localtime()."\n";
|
|
close(TIMEFILE);
|
|
if ($state==0) {
|
|
my_cmd("rm -f prof* tr* in* *$extstr[1] temp* pre") if ($debug<4) ;
|
|
chdir("..");
|
|
my_cmd("mkdir -p $output_dir");
|
|
if ($tgz) {
|
|
my_cmd("tar -czf $done_tar_file $output_prefix; rm -rf $output_prefix");
|
|
log_print(0,1,"---\n $output_prefix $my_name done : dir saved to $done_tar_file\n".localtime()."\n");
|
|
}
|
|
else {
|
|
my_cmd("rm -rf $output_dir/$output_prefix; mv -f $output_prefix $output_dir") if($basedir!~/$output_dir/);
|
|
log_print(0,1,"---\n $output_prefix $my_name done : dir saved to $output_dir/$output_prefix\n".localtime()."\n");
|
|
}
|
|
exit;
|
|
}
|
|
else {
|
|
chdir("..");
|
|
my_cmd("mkdir -p $output_dir\_err");
|
|
if ($tgz) {
|
|
my_cmd("tar -czf $err_tar_file $output_prefix");
|
|
log_print(0,1,"$output_prefix $my_name error : tmp dir saved to $err_tar_file\n".localtime()."\n");
|
|
}
|
|
else {
|
|
my_cmd("cp -rf $output_prefix $output_dir\_err");
|
|
log_print(0,1,"$output_prefix $my_name error : tmp dir saved to $output_dir\_err/$output_prefix\n".localtime()."\n");
|
|
}
|
|
}
|
|
my_cmd("rm -rf $output_prefix") if ($debug<4);
|
|
exit;
|
|
}#sub cleanup
|
|
################################################
|
|
#----------------------------------------------#
|
|
################################################
|
|
sub my_sigtrap { # 1st argument is signal name
|
|
my ($sig) = @_;
|
|
log_print(0,0,"$my_name $$ caught a SIG$sig -- ".(localtime)."\n");
|
|
wait;
|
|
exit(0);
|
|
}# sub my_sigtrap
|
|
|
|
sub prepare_user_tree_mafft {
|
|
my ($tree_file)= @_;
|
|
system ("cp $tree_file $tree_file".".ORIG");
|
|
open (IN,$tree_file) || die "Can't open IN '$tree_file' $!";
|
|
my @in=<IN>;
|
|
close (IN);
|
|
chomp (@in);
|
|
my $tree=join("",@in);
|
|
$tree=~s/;$//;
|
|
$tree=~s/(seq[0-9]{4})/\n$1\n/g;
|
|
open (OUT,">$tree_file") || die "Can't open OUT '$tree_file' $!";
|
|
print OUT "$tree\n";
|
|
close OUT;
|
|
}
|
|
|
|
# Returns the filename without directory
|
|
sub getFilename{
|
|
my $fullFile = pop @_;
|
|
if ($fullFile =~ m/.*[\\\/](.*)$/) {
|
|
return $1;
|
|
} else {return $fullFile}
|
|
|
|
}
|
|
|
|
|
|
####################################################
|
|
####################################################
|
|
### THE END
|
|
####################################################
|
|
####################################################
|
|
# FFU:
|
|
#
|
|
#$metcmds{MUS}='(time -p muscle -in input.fasta -out output.fasta 2>&1) 2>&1';
|
|
#$metcmds{PRC}='(time -p probcons input.fasta 2>&1 >output.fasta) 2>&1';
|
|
#$metcmds{DIA}='(time -p dialign-t '.$ENV{HOME}.'/app/dialign/ input.fasta output.fasta 2>&1) 2>&1';
|
|
#$metcmds{PCM}='(time -p pcma input.fasta 2>&1;clustalw -infile=input.aln -convert -output=fasta -outfile=output.fasta 2>&1) 2>&1';
|
|
#$metcmds{POA}='(time -p poa -read_fasta input.fasta -toupper -clustal input.aln '.$ENV{HOME}.'/app/poa/blosum80.mat 2>&1) 2>&1;clustalw -infile=input.aln -convert -output=fasta -outfile=output.fasta 2>&1';
|
|
#$metcmds{MCO}='(time -p t_coffee input.fasta -special_mode mcoffee -run_name output -output fasta_aln, score_ascii -quiet stdout 2>&1) 2>&1;mv output.fasta_aln output.fasta 2>&1';
|
|
#$metcmds{TCO}='(time -p t_coffee input.fasta -run_name output -output fasta_aln, score_ascii -quiet stdout 2>&1) 2>&1;mv output.fasta_aln output.fasta 2>&1';
|