Skip to content

Commit

Permalink
v0.8.3
Browse files Browse the repository at this point in the history
  • Loading branch information
eunjijunekim committed Jan 23, 2017
1 parent 111ff3e commit 23e0662
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 34 deletions.
6 changes: 5 additions & 1 deletion norm_scripts/restart_failedjobs_only.pl
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@
die $USAGE;
}
my @list;
my $qopt=0;
for(my $i=4;$i<@ARGV;$i++){
my $option_rec = "false";
if ($ARGV[$i] eq '-qlist'){
$option_rec = "true";
@list = split(",", $ARGV[$i+1]);
$i++;
$qopt++;
}
if ($option_rec eq 'false'){
die "option \"$ARGV[$i]\" not recognized\n";
}
}

if ($qopt == 0){
die "Please provide -qlist '3G,6G,10G,15G,30G,45G,60G\n";
}
my $dirs = $ARGV[0];
my $LOC = $ARGV[1];
my $errname = $ARGV[2];
Expand Down
157 changes: 124 additions & 33 deletions norm_scripts/runall_normalization.pl
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,17 @@
$cutoff_le = $cutoff_temp;
}

if ($new_norm eq "true"){
for(my $i=0; $i<@ARGV; $i++) {
if ($ARGV[$i] eq "-alt_out"){
$normdir = $ARGV[$i+1];
$altstats = "-alt_stats $normdir/STATS/";
unless (-d "$normdir/STATS/"){
`mkdir -p $normdir/STATS/`;
}
}
}
}
#check for white spaces
my $to_trim = "false";
open(DIRS, $sample_dir);
Expand Down Expand Up @@ -596,7 +607,18 @@
}
open(LOG, ">>$logfile");
print LOG "\nPORT v0.8.3-beta\n";
print LOG "\n*************\n$input\n*************\n";
my $default_input = $input;
#$default_input = `cat $shdir/runall_normalization.sh`;
$default_input =~ s/perl\ //g;
$default_input =~ s/runall_normalization.pl/run_normalization/g;
$default_input =~ s/\-fa\n//;
$default_input =~ s/\-fq\n//;
$default_input =~ s/\-sam //;
$default_input =~ s/\-bam //;
$default_input =~ s/\-gz//;
$default_input =~ s/\-se//;
print LOG "\n*************\n$default_input\n*************\n";

if (-e "$logdir/$study.runall_normalization.out"){
`rm $logdir/$study.runall_normalization.out`;

Expand Down Expand Up @@ -629,22 +651,90 @@
my @b = split(" ", $get_name);
$name = $b[@b-1];
}
# if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), go back one step and start from
# runall_sam2genes (or runall_sam2genes_2)
# if resumed at runall_cat_genes_files (or runall_cat_genes_files_norm step), unless temp files exist,
# go back one step and start from runall_sam2genes (or runall_sam2genes_2)
my $cat_flag = 0;
if ($name =~ /runall_cat_genes_files$/){
$cat_flag++;
my $tempname = $name;
$tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/;
$name = $tempname;

my $TEflag = 0;
my $err_name = "cat_genes.0.*.err";
my $new_queue = "-mem $queue_3G";
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
my $rtmp = `wc -l $resume_file`;
my ($res_cnt, $res_n) = split(" ", $rtmp);
open(IN, $resume_file);
while(my $line = <IN>){
chomp($line);
my @tcnt = glob("$LOC/$line/GNORM/*/*temp*");
if (@tcnt > 0){
$TEflag++;
}
}
close(IN);
# print "$TEflag\t$res_cnt\n";
if ($TEflag ne $res_cnt){ #temp files don't exist
$cat_flag = 1;
my $tempname = $name;
$tempname =~ s/runall_cat_genes_files$/runall_sam2genes_gnorm/;
$name = $tempname;
}
}
if ($name =~ /runall_cat_genes_files_norm$/){
$cat_flag++;
my $tempname = $name;
$tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/;
$name = $tempname;
my $TEflag = 0;
my $err_name = "cat_genes.1.*.err";
my $new_queue = "-mem $queue_3G";
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
my $rtmp = `wc -l $resume_file`;
my ($res_cnt, $res_n) =split(" ", $rtmp);
open(IN, $resume_file);
while(my $line = <IN>){
chomp($line);
my @tcnt;
if ($STRANDED =~ /TRUE/i){
@tcnt = glob("$normdir/GENE/FINAL_SAM/*sense/$line.*temp*");
}
else{
@tcnt = glob("$normdir/GENE/FINAL_SAM/$line.*temp*");
}
if (@tcnt > 0){
$TEflag++;
}
}
close(IN);
# print "$TEflag\t$res_cnt\n";
if ($TEflag ne $res_cnt){ #temp files don't exist
$cat_flag = 1;
my $tempname = $name;
$tempname =~ s/runall_cat_genes_files_norm$/runall_sam2genes_gnorm_2/;
$name = $tempname;
}
}
# if resumed at runall_parseblastout, unless blastdb files exist,
# go back one step and start from runall_runblast
my $blast_flag = 0;
if ($name =~ /runall_parseblastout$/){
my $BDBflag = 0;
my $err_name = "parseblastout.*.err";
my $new_queue = "-mem $queue_3G";
my $res = `perl $norm_script_dir/restart_failedjobs_only.pl $sample_dir $LOC \"$err_name\" \"$new_queue\" -qlist \"$qlist\"`;
my $rtmp = `wc -l $resume_file`;
my ($res_cnt, $res_n) =split(" ", $rtmp);
open(IN, $resume_file);
while(my $line = <IN>){
chomp($line);
my @tcnt = glob("$LOC/$line/blastdb*");
if (@tcnt > 0){
$BDBflag++;
}
}
close(IN);
# print "$BDBflag\t$res_cnt\n";
if ($BDBflag ne $res_cnt){ #database files don't exist
$blast_flag = 1;
my $tempname = $name;
$tempname =~ s/runall_parseblastout$/runall_runblast/;
$name = $tempname;
}
}

my @a = split(/\./, $name);
$name_to_check = $a[@a-1];
my $get_num = $last_step;
Expand All @@ -655,16 +745,21 @@
print LOG "\nJob number not provided. Setting it to 1.\n";
}
else{
if ($cat_flag == 1){
if (($cat_flag == 1) || ($blast_flag ==1)){
$res_num--;
if ($name =~ /_2$/){
print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n";
}
else{
print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n";
}
}
}
if ($cat_flag == 1){
if ($name =~ /_2$/){
print LOG "\nCannot resume at runall_cat_genes_files_norm.\nResuming at the previous step...\n";
}
else{
print LOG "\nCannot resume at runall_cat_genes_files.\nResuming at the previous step...\n";
}
}
if ($blast_flag == 1){
print LOG "Cannot resume at runall_parseblastout.\nResuming at the previous step...\n";
}
$length = length($res_num) + length($name) + 3;
print LOG "\nRESUME at $res_num \"$name\"\n==========";
for (my $i=0; $i < $length; $i++){
Expand All @@ -673,6 +768,7 @@
print LOG "\n";
$run_job = "false";
}

if ($run_prepause eq "true"){
$job_num = 1;
if ($run_job eq "true"){
Expand Down Expand Up @@ -923,6 +1019,7 @@
$c_option = "$submit \\\"$batchjobs,$jobname, $request, $queue_6G, $stat\\\"";
}
$new_queue = "-mem $queue_6G";

while(qx{$stat | wc -l} > $maxjobs){
sleep(10);
}
Expand Down Expand Up @@ -1041,7 +1138,7 @@
chomp($numr);
my @xnumr = split(" " , $numr);
my $maxribo = $xnumr[0];
$maxribo =~ s/\,//;
$maxribo =~ s/\,//g;
if ($maxribo > 10000000){
$new_queue = "-mem $queue_6G";
if ($maxribo > 20000000){
Expand Down Expand Up @@ -1750,7 +1847,7 @@
chomp($numr);
my @xnumr = split(" " , $numr);
my $maxribo = $xnumr[0];
$maxribo =~ s/\,//;
$maxribo =~ s/\,//g;
if ($maxribo > 10000000){
$new_queue = "-mem $queue_6G";
if ($maxribo > 20000000){
Expand Down Expand Up @@ -2598,7 +2695,7 @@
print LOG "Check \"$study_dir/STATS/EXON_INTRON_JUNCTION/percent_high_expresser_*.txt\" \nUse \"-cutoff_highexp <n>\" option to set/change the highexpresser cutoff value.\n(You may use -cutoff_highexp 100 to unfilter/keep the highexpressers.)\n\n";
}
}

=comment
$default_input = `cat $shdir/runall_normalization.sh`;
$default_input =~ s/perl\ //g;
$default_input =~ s/runall_normalization.pl/run_normalization/g;
Expand All @@ -2610,6 +2707,7 @@
$default_input =~ s/\-se//;
$default_input =~ s/\'-resume_at'\ .+\ //;
$default_input =~ s/\-resume//;
=cut
print LOG "*************\nUse \"-part2\" option to continue:\n(do not change options other than the ones listed above)\n";
#print LOG "e.g. $default_input -part2\n*************\n";
}
Expand All @@ -2619,14 +2717,6 @@
print LOG "\nERROR: \"$study.$name_to_check\" step is not in [PART1].\n\tCannot resume at \"$study.$name_to_check\" step. Please check your pipeline option and -resume_at \"<step>\" option.\n\n";
}
}
if ($new_norm eq "true"){
for(my $i=0; $i<@ARGV; $i++) {
if ($ARGV[$i] eq "-alt_out"){
$normdir = $ARGV[$i+1];
$altstats = "-alt_stats $normdir/STATS/";
}
}
}
if ($run_norm eq "true"){
if ($run_prepause eq "false"){
$job_num = 1;
Expand Down Expand Up @@ -3395,7 +3485,7 @@

my $mem_quants = $mem;
if ($num_samples > 200){
$mem_quants = "$request$queue_6G";
$mem_quants = "$request$queue_10G";
}
$job = "echo \"perl $norm_script_dir/quants2spreadsheet_min_max.pl $sample_dir $LOC genequants $filter_highexp $data_stranded -normdir $normdir\" | $batchjobs $mem_quants $jobname \"$study.quants2spreadsheet_gnorm\" -o $logdir/$study.quants2spreadsheet_gnorm.out -e $logdir/$study.quants2spreadsheet_gnorm.err";

Expand Down Expand Up @@ -3689,6 +3779,7 @@
&check_err ($name_of_job, $err_name, $job_num);
$job_num++;
}
=comment
#exon2nonexon
$name_of_job = "$study.get_exon2nonexon_stats_p2";
if (($resume eq "true")&&($run_job eq "false")){
Expand Down Expand Up @@ -3765,7 +3856,7 @@
$job_num++;
}
}

=cut
#predict_num_reads EIJ p2
$name_of_job = "$study.predict_num_reads_p2";
if (($resume eq "true")&&($run_job eq "false")){
Expand Down

0 comments on commit 23e0662

Please sign in to comment.