|
| 1 | +#!/usr/bin/env perl |
| 2 | + |
| 3 | +use strict; |
| 4 | +use warnings; |
| 5 | +use DBI; |
| 6 | +use Getopt::Long qw(GetOptions); |
| 7 | +use File::Basename; |
| 8 | +use Try::Tiny; |
| 9 | +use MIME::Lite; |
| 10 | +use JSON; |
| 11 | +use Data::Dumper; |
| 12 | +use Cwd 'abs_path'; |
| 13 | +require(dirname(abs_path($0))."/../lib/Onco.pm"); |
| 14 | + |
| 15 | + |
| 16 | +local $SIG{__WARN__} = sub { |
| 17 | + my $message = shift; |
| 18 | + if ($message =~ /uninitialized/) { |
| 19 | + die "Warning:$message"; |
| 20 | + } |
| 21 | +}; |
| 22 | + |
| 23 | +my $input_dir; |
| 24 | + |
| 25 | +my $usage = <<__EOUSAGE__; |
| 26 | +
|
| 27 | +Usage: |
| 28 | +
|
| 29 | +$0 [options] |
| 30 | +
|
| 31 | +Options: |
| 32 | +
|
| 33 | + -i <string> Input folder |
| 34 | + |
| 35 | +__EOUSAGE__ |
| 36 | + |
| 37 | + |
| 38 | + |
| 39 | +GetOptions ( |
| 40 | + 'i=s' => \$input_dir |
| 41 | +); |
| 42 | + |
| 43 | +if (!$input_dir) { |
| 44 | + die "input folder are missing\n$usage"; |
| 45 | +} |
| 46 | + |
| 47 | +my $script_dir = dirname(__FILE__); |
| 48 | + |
| 49 | +my $dbh = getDBI(); |
| 50 | +my $sid = getDBSID(); |
| 51 | +my $db_type = getDBType(); |
| 52 | + |
| 53 | +my $sample_id = basename($input_dir); |
| 54 | +my $flagstat = "$input_dir/${sample_id}.flagstat.txt"; |
| 55 | +if ( ! -e $flagstat) { |
| 56 | + die("flagstat file not found!\n"); |
| 57 | +} |
| 58 | +my $sth_smp = $dbh->prepare("select patient_id from samples where sample_id='$sample_id'"); |
| 59 | +$sth_smp->execute(); |
| 60 | +my $patient_id; |
| 61 | +if (my ($pid) = $sth_smp->fetchrow_array) { |
| 62 | + $patient_id = $pid; |
| 63 | +} |
| 64 | +$sth_smp->finish; |
| 65 | + |
| 66 | +my $sth_insert = $dbh->prepare("insert into chipseq values(?,?,?,?,?,?,?,?,?)"); |
| 67 | + |
| 68 | + |
| 69 | +open(FLAGSTAT_FILE, "$flagstat") or die "Cannot open file $flagstat"; |
| 70 | +my $mapped = 0; |
| 71 | +my $dup = 0; |
| 72 | +my $paired = 0; |
| 73 | +while (<FLAGSTAT_FILE>) { |
| 74 | + chomp; |
| 75 | + if (/in total/) { |
| 76 | + ($mapped) = $_ =~ /(\d+)\s/; |
| 77 | + } |
| 78 | + if (/duplicates/) { |
| 79 | + ($dup) = $_ =~ /(\d+)\s/; |
| 80 | + } |
| 81 | + if (/properly paired/) { |
| 82 | + ($paired) = $_ =~ /(\d+)\s/; |
| 83 | + } |
| 84 | +} |
| 85 | +my $dup_rate = $dup/$mapped; |
| 86 | +close(FLAGSTAT_FILE); |
| 87 | +my $has_spike_in = "N"; |
| 88 | +my $spike_in_reads = 0; |
| 89 | +my $spike_in_file = "$input_dir/SpikeIn/spike_map_summary"; |
| 90 | +if ( -e $spike_in_file ) { |
| 91 | + open(SPIKEIN_FILE, "$spike_in_file") or die "Cannot open file $spike_in_file"; |
| 92 | + <SPIKEIN_FILE>; |
| 93 | + my $line = <SPIKEIN_FILE>; |
| 94 | + chomp $line; |
| 95 | + my @tokens = split(/\t/, $line); |
| 96 | + $spike_in_reads = $tokens[1]; |
| 97 | + $has_spike_in = "Y"; |
| 98 | +} |
| 99 | +close(SPIKEIN_FILE); |
| 100 | + |
| 101 | +my @peak_files = grep { -f } glob "$input_dir/MACS_Out_*/*nobl.bed"; |
| 102 | +my @peak_counts = (); |
| 103 | +my $super_enchancer = "N"; |
| 104 | + |
| 105 | +foreach my $peak_file(@peak_files) { |
| 106 | + #print("$peak_file\n"); |
| 107 | + my $count = readpipe("wc -l $peak_file | cut -f1 -d' '"); |
| 108 | + chomp $count; |
| 109 | + (my $cutoff) = $peak_file =~ /MACS_Out_(.*?)\//; |
| 110 | + push @peak_counts, "$cutoff:$count"; |
| 111 | +} |
| 112 | + |
| 113 | +my @rose_files = grep { -f } glob "$input_dir/MACS_Out_q_1e-02/ROSE_out_*/*_peaks_SuperStitched.table.txt"; |
| 114 | +if ($#rose_files >= 0) { |
| 115 | + $super_enchancer = "Y"; |
| 116 | +} |
| 117 | + |
| 118 | +my $total_file = "$input_dir/../total_reads.txt"; |
| 119 | +my $total_reads; |
| 120 | +if ( -e $total_file ) { |
| 121 | + open(TOTAL_FILE, "$total_file") or die "Cannot open file $total_file"; |
| 122 | + while (<TOTAL_FILE>) { |
| 123 | + chomp; |
| 124 | + my @fs = split(/\t/); |
| 125 | + if ($#fs == 1) { |
| 126 | + if ($fs[0] eq $sample_id) { |
| 127 | + $total_reads = $fs[1]; |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | +} |
| 132 | +if ($paired > 0) { |
| 133 | + if (!$total_reads) { |
| 134 | + $total_reads = $total_reads*2; |
| 135 | + } |
| 136 | + $paired = "Y"; |
| 137 | +} else { |
| 138 | + $paired = "N"; |
| 139 | +} |
| 140 | +close(TOTAL_FILE); |
| 141 | +$dbh->do("delete chipseq where sample_id='$sample_id'"); |
| 142 | +$sth_insert->execute($sample_id, $total_reads, $mapped, $dup, $paired, $has_spike_in, $spike_in_reads, join(",", @peak_counts), $super_enchancer); |
| 143 | +print ("$patient_id $sample_id $paired $total_reads $mapped $dup $dup_rate $has_spike_in $spike_in_reads ".join(",", @peak_counts)." $super_enchancer\n"); |
| 144 | +$dbh->commit(); |
| 145 | + |
| 146 | + |
| 147 | +sub sendEmail { |
| 148 | + my ($content, $database_name, $recipient) = @_; |
| 149 | + my $subject = "OncogenomicsDB master file upload status"; |
| 150 | + my $sender = 'oncogenomics@mail.nih.gov'; |
| 151 | + #my $recipient = 'hsien-chao.chou@nih.gov, rajesh.patidar@nih.gov, manoj.tyagi@nih.gov, yujin.lee@nih.gov, wangc@mail.nih.gov'; |
| 152 | +# if ($database_name eq "development") { |
| 153 | +# $recipient = 'vuonghm@mail.nih.gov'; |
| 154 | +# } |
| 155 | + my $mime = MIME::Lite->new( |
| 156 | + 'From' => $sender, |
| 157 | + 'To' => $recipient, |
| 158 | + 'Subject' => $subject, |
| 159 | + 'Type' => 'text/html', |
| 160 | + 'Data' => $content, |
| 161 | + ); |
| 162 | + |
| 163 | + $mime->send(); |
| 164 | +} |
| 165 | + |
| 166 | + |
| 167 | + |
0 commit comments