I am submitting a single workflow on a single pair (not a pair set), but it seems that duplicate workflows are running? Please see the attached PDF. This seems confusing. Is this proper behavior?
If it is helpful the WDL is below .
=========================================================================
task CallSomaticMutations_131_Prepare {
File tumorBam
File tumorBamIdx
File normalBam
File normalBamIdx
File refFastaIdx
File mutectIntervals
command <<<
mkdir -pv tumor_split
mkdir -pv normal_split
mkdir -pv interval_split
export SPLIT_INDEX=0 ;
#for SEQUENCE in `cut -f1 ${refFastaIdx}` ;
for SEQUENCE in `cut -f1 ${refFastaIdx}|grep -Pi '^\s*2[02]\s*$'` ;
do
CHROM=$SEQUENCE ;
echo "Now splitting with CHROM=$CHROM ..."
#split normal bam by chrom
samtools view ${normalBam} $CHROM -b > normal_split/$CHROM.bam ;
#split tumor bam by chrom
samtools view ${tumorBam} $CHROM -b > tumor_split/$CHROM.bam ;
#split intervals
INT_SPLIT_FILE="interval_split/$CHROM.intervals"
cat ${mutectIntervals} | grep -P '^@' > $INT_SPLIT_FILE
cat ${mutectIntervals} | awk -v CHROM_COMP=$CHROM '{if($1==CHROM_COMP) print $0}' >> $INT_SPLIT_FILE
#save integers to file for subsequent iteration for indexing
echo "$SPLIT_INDEX" >> split_indices.dat
SPLIT_INDEX=`echo $SPLIT_INDEX + 1 |bc` ;
done ;
>>>
output {
Array[File] tumor_bams=glob("tumor_split/*.bam")
Array[File] normal_bams=glob("normal_split/*.bam")
Array[File] intervals=glob("interval_split/*.intervals")
Array[Int] split_indices=read_lines("split_indices.dat")
}
runtime {
docker: "broadinstitute/eddiescgimage"
memory: "24 GB"
defaultDisks: "local-disk 100 SSD"
}
}
task Mutect1Task {
File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel
command <<<
#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}
#mutect 1
java -jar -Xmx4g /usr/local/bin/muTect-1.1.6.jar --analysis_type MuTect \
-L ${mutectIntervals} --normal_sample_name NORMAL_SAMPLE -I:normal ${normalBam} \
--tumor_sample_name TUMOR_SAMPLE -I:tumor ${tumorBam} \
--reference_sequence ${refFasta} \
--fraction_contamination ${fracContam} --dbsnp ${dbSNPVCF} \
--cosmic ${cosmicVCF} \
--out MuTect1.call_stats.txt --coverage_file MuTect1.coverage.wig.txt \
--power_file MuTect1.power.wig.txt --downsample_to_coverage ${downsampleToCoverage} \
--normal_panel ${normalPanel}
>>>
runtime {
docker: "broadinstitute/eddiescgimage"
memory: "24 GB"
defaultDisks: "local-disk 100 SSD"
}
output {
File mutect1_cs="MuTect1.call_stats.txt"
File mutect1_pw="MuTect1.power.wig.txt"
File mutect1_cw="MuTect1.coverage.wig.txt"
}
}
task Mutect2Task {
File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel
command <<<
#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}
#mutect 2
java -jar -Xmx4g /usr/local/bin/GenomeAnalysisTK.jar --analysis_type MuTect2 \
-L ${mutectIntervals} -I:normal ${normalBam} \
-I:tumor ${tumorBam} \
--reference_sequence ${refFasta} \
--dbsnp ${dbSNPVCF} \
--cosmic ${cosmicVCF} \
--out MuTect.call_stats.txt \
--normal_panel ${normalPanel}
>>>
runtime {
docker: "broadinstitute/eddiescgimage"
memory: "24 GB"
defaultDisks: "local-disk 100 SSD"
}
output {
File mutect2_cs="MuTect.call_stats.txt"
}
}
task MutectFCTask {
File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel
command <<<
#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}
#mutect force-calling from CallSomaticMutationsForceCalling_45
java -Xmx4g -jar /usr/local/bin/muTect-qscore.jar --read_group_black_list ${readGroupBlackList} \
-rf BadCigar --analysis_type MuTect -L ${mutectIntervals} \
--normal_sample_name NORMAL_SAMPLE -I:normal ${normalBam} \
--tumor_sample_name TUMOR_SAMPLE -I:tumor ${tumorBam} --reference_sequence ${refFasta}\
--dbsnp ${dbSNPVCF} --cosmic ${cosmicVCF} --out MuTectFC.call_stats.txt \
--coverage_file MuTectFC.coverage.wig.txt --power_file MuTectFC.power.wig.txt \
--enable_extended_output --enable_qscore_output --downsample_to_coverage ${downsampleToCoverage}\
--normal_panel ${normalPanel}
>>>
runtime {
docker: "broadinstitute/eddiescgimage"
memory: "24 GB"
defaultDisks: "local-disk 100 SSD"
}
output {
File mutectfc_cs="MuTectFC.call_stats.txt"
File mutectfc_pw="MuTectFC.power.wig.txt"
File mutectfc_cw="MuTectFC.coverage.wig.txt"
}
}
task GatherAndOncotateAndVEP {
Array[File] mutect1_cs
Array[File] mutect1_pw
Array[File] mutect1_cw
Array[File] mutect2_cs
Array[File] mutectfc_cs
Array[File] mutectfc_pw
Array[File] mutectfc_cw
command <<<
#mutect1 call_stats merging
MUTECT1_CS="MuTect1.call_stats.txt"
head --lines=2 ${mutect1_cs[0]} > $MUTECT1_CS
cat ${sep =' ' mutect1_cs} | grep -Pv '#'|grep -Pv '^contig' >> $MUTECT1_CS
#mutect2 call_stats merging
MUTECT2_CS="MuTect2.call_stats.txt"
cat ${mutect2_cs[0]} |grep -P '^#' > $MUTECT2_CS ;
cat ${sep=' ' mutect2_cs} |grep -Pv '^#' >> $MUTECT2_CS ;
#convert them to VCFs
MUTECT1_VCF="MuTect1.call_stats.vcf"
MUTECT2_VCF="MuTect2.call_stats.vcf"
/usr/local/bin/call_stats_to_superlite_vcf.pl $MUTECT1_CS > $MUTECT1_VCF ;
/usr/local/bin/call_stats_to_superlite_vcf.pl $MUTECT2_CS > $MUTECT2_VCF ;
#merge the vcfs from Mutect1 and Mutect2
MUTECT_BOTH_VCF="Mutect1_And_Mutect2.merged.vcf" ;
cat $MUTECT1_VCF | grep -P '^#' > $MUTECT_BOTH_VCF
cat $MUTECT1_VCF $MUTECT2_VCF | grep -Pv '^#' >> $MUTECT_BOTH_VCF
#Run the merged VCF (from both mutects through Oncotator)
mkdir -pv oncotator_empty
oncotator -i VCF --db-dir `pwd`/oncotator_empty/ -o VCF $MUTECT_BOTH_VCF oncotator.out hg19
#Obtain a RAW call stats from mutect1 and mutect2 and run it through VEP
#Run the VCF from the call stats through VEP
>>>
output {
File oncotator_out="oncotator.out hg19"
File mutect1Merged="MuTect1.call_stats.txt"
File mutect2Merged="MuTect2.call_stats.txt"
}
runtime {
docker: "broadinstitute/eddiescgimage"
memory: "24 GB"
defaultDisks: "local-disk 100 SSD"
}
}
workflow CallingGroupWorkflow {
File tumorBam
File tumorBamIdx
File normalBam
File normalBamIdx
File refFastaIdx
File mutectIntervals
File refFasta
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel
# PREPARE FOR SCATTER
call CallSomaticMutations_131_Prepare {
input:
tumorBam=tumorBam,
tumorBamIdx=tumorBamIdx,
normalBam=normalBam,
normalBamIdx=normalBamIdx,
refFastaIdx=refFastaIdx,
mutectIntervals=mutectIntervals
}
#SCATTER AND ANALYZE
scatter (i in CallSomaticMutations_131_Prepare.split_indices) {
call Mutect1Task {
input:
tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
refFasta=refFasta,
refFastaIdx=refFastaIdx,
refFastaDict=refFastaDict,
fracContam=fracContam,
dbSNPVCF=dbSNPVCF,
cosmicVCF=cosmicVCF,
downsampleToCoverage=downsampleToCoverage,
readGroupBlackList=readGroupBlackList,
normalPanel=normalPanel
}
call Mutect2Task {
input:
tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
refFasta=refFasta,
refFastaIdx=refFastaIdx,
refFastaDict=refFastaDict,
fracContam=fracContam,
dbSNPVCF=dbSNPVCF,
cosmicVCF=cosmicVCF,
downsampleToCoverage=downsampleToCoverage,
readGroupBlackList=readGroupBlackList,
normalPanel=normalPanel
}
call MutectFCTask {
input:
tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
refFasta=refFasta,
refFastaIdx=refFastaIdx,
refFastaDict=refFastaDict,
fracContam=fracContam,
dbSNPVCF=dbSNPVCF,
cosmicVCF=cosmicVCF,
downsampleToCoverage=downsampleToCoverage,
readGroupBlackList=readGroupBlackList,
normalPanel=normalPanel
}
}
call GatherAndOncotateAndVEP {
input:
mutect1_cs=Mutect1Task.mutect1_cs,
mutect1_pw=Mutect1Task.mutect1_pw,
mutect1_cw=Mutect1Task.mutect1_cw,
mutect2_cs=Mutect2Task.mutect2_cs,
mutectfc_cs=MutectFCTask.mutectfc_cs,
mutectfc_pw=MutectFCTask.mutectfc_pw,
mutectfc_cw=MutectFCTask.mutectfc_cw
}
}