Quantcast
Channel: Ask the FireCloud Team — GATK-Forum
Viewing all articles
Browse latest Browse all 1147

duplicate workflow submission?

$
0
0

I am submitting a single workflow on a single pair (not a pair set), but it seems that duplicate workflows are running? Please see the attached PDF. This seems confusing. Is this proper behavior?

If it is helpful the WDL is below .

=========================================================================

task CallSomaticMutations_131_Prepare {
File tumorBam
File tumorBamIdx
File normalBam
File normalBamIdx
File refFastaIdx
File mutectIntervals

command <<<

    mkdir -pv tumor_split
    mkdir -pv normal_split
    mkdir -pv interval_split
    export SPLIT_INDEX=0 ;
    #for SEQUENCE in `cut -f1 ${refFastaIdx}` ; 
    for SEQUENCE in `cut -f1 ${refFastaIdx}|grep -Pi '^\s*2[02]\s*$'` ; 
        do 
        CHROM=$SEQUENCE ;
        echo "Now splitting with CHROM=$CHROM ..."

        #split normal bam by chrom
        samtools view ${normalBam} $CHROM -b > normal_split/$CHROM.bam ;
        #split tumor bam by chrom
        samtools view ${tumorBam} $CHROM -b > tumor_split/$CHROM.bam ;
        #split intervals
        INT_SPLIT_FILE="interval_split/$CHROM.intervals"
        cat ${mutectIntervals} | grep -P '^@' > $INT_SPLIT_FILE
        cat ${mutectIntervals} | awk -v CHROM_COMP=$CHROM '{if($1==CHROM_COMP) print $0}' >> $INT_SPLIT_FILE

        #save integers to file for subsequent iteration for indexing
        echo "$SPLIT_INDEX" >> split_indices.dat
        SPLIT_INDEX=`echo $SPLIT_INDEX + 1 |bc` ;

    done ;
    >>>

output  {
    Array[File] tumor_bams=glob("tumor_split/*.bam")
    Array[File] normal_bams=glob("normal_split/*.bam")
    Array[File] intervals=glob("interval_split/*.intervals")
    Array[Int] split_indices=read_lines("split_indices.dat")
    }

runtime {
    docker: "broadinstitute/eddiescgimage"
    memory: "24 GB"
    defaultDisks: "local-disk 100 SSD"
    }

}

task Mutect1Task {

File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel

command <<<

#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}

#mutect 1
java -jar -Xmx4g /usr/local/bin/muTect-1.1.6.jar --analysis_type MuTect \
 -L ${mutectIntervals}  --normal_sample_name NORMAL_SAMPLE -I:normal  ${normalBam}  \
 --tumor_sample_name TUMOR_SAMPLE -I:tumor ${tumorBam}  \
 --reference_sequence ${refFasta} \
 --fraction_contamination ${fracContam}  --dbsnp ${dbSNPVCF} \
 --cosmic ${cosmicVCF} \
 --out MuTect1.call_stats.txt --coverage_file MuTect1.coverage.wig.txt \
 --power_file MuTect1.power.wig.txt --downsample_to_coverage ${downsampleToCoverage} \
 --normal_panel ${normalPanel}

>>>

runtime {
    docker: "broadinstitute/eddiescgimage"
    memory: "24 GB"
    defaultDisks: "local-disk 100 SSD"
    }

output {
    File mutect1_cs="MuTect1.call_stats.txt"
    File mutect1_pw="MuTect1.power.wig.txt"
    File mutect1_cw="MuTect1.coverage.wig.txt"
    }

}

task Mutect2Task {

File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel

command <<<

#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}

#mutect 2
java -jar -Xmx4g /usr/local/bin/GenomeAnalysisTK.jar --analysis_type MuTect2 \
 -L ${mutectIntervals}  -I:normal  ${normalBam}  \
 -I:tumor ${tumorBam}  \
 --reference_sequence ${refFasta} \
 --dbsnp ${dbSNPVCF} \
 --cosmic ${cosmicVCF} \
 --out MuTect.call_stats.txt \
 --normal_panel ${normalPanel}

>>>

runtime {
    docker: "broadinstitute/eddiescgimage"
    memory: "24 GB"
    defaultDisks: "local-disk 100 SSD"
    }

output {
    File mutect2_cs="MuTect.call_stats.txt"
    }

}

task MutectFCTask {

File tumorBam
File normalBam
File mutectIntervals
File refFasta
File refFastaIdx
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel

command <<<

#index the bams first
samtools index ${tumorBam}
samtools index ${normalBam}

#mutect force-calling from CallSomaticMutationsForceCalling_45
java -Xmx4g -jar /usr/local/bin/muTect-qscore.jar --read_group_black_list ${readGroupBlackList} \
 -rf BadCigar --analysis_type MuTect -L ${mutectIntervals} \
 --normal_sample_name NORMAL_SAMPLE -I:normal ${normalBam} \
 --tumor_sample_name TUMOR_SAMPLE -I:tumor ${tumorBam} --reference_sequence ${refFasta}\
 --dbsnp ${dbSNPVCF}  --cosmic ${cosmicVCF} --out MuTectFC.call_stats.txt \
 --coverage_file MuTectFC.coverage.wig.txt --power_file MuTectFC.power.wig.txt \
 --enable_extended_output --enable_qscore_output --downsample_to_coverage ${downsampleToCoverage}\
 --normal_panel ${normalPanel}

>>>

runtime {
    docker: "broadinstitute/eddiescgimage"
    memory: "24 GB"
    defaultDisks: "local-disk 100 SSD"
    }

output {
    File mutectfc_cs="MuTectFC.call_stats.txt"
    File mutectfc_pw="MuTectFC.power.wig.txt"
    File mutectfc_cw="MuTectFC.coverage.wig.txt"
    }

}

task GatherAndOncotateAndVEP {

Array[File] mutect1_cs
Array[File] mutect1_pw
Array[File] mutect1_cw
Array[File] mutect2_cs
Array[File] mutectfc_cs
Array[File] mutectfc_pw
Array[File] mutectfc_cw

command <<<

    #mutect1 call_stats merging
    MUTECT1_CS="MuTect1.call_stats.txt"
    head --lines=2 ${mutect1_cs[0]} > $MUTECT1_CS
    cat ${sep =' ' mutect1_cs} | grep -Pv '#'|grep -Pv '^contig' >> $MUTECT1_CS

    #mutect2 call_stats merging
    MUTECT2_CS="MuTect2.call_stats.txt"
    cat ${mutect2_cs[0]} |grep -P '^#' > $MUTECT2_CS ;
    cat ${sep=' ' mutect2_cs} |grep -Pv '^#' >> $MUTECT2_CS ;

    #convert them to VCFs
    MUTECT1_VCF="MuTect1.call_stats.vcf"
    MUTECT2_VCF="MuTect2.call_stats.vcf"
    /usr/local/bin/call_stats_to_superlite_vcf.pl $MUTECT1_CS > $MUTECT1_VCF ;
    /usr/local/bin/call_stats_to_superlite_vcf.pl $MUTECT2_CS > $MUTECT2_VCF ;

    #merge the vcfs from Mutect1 and Mutect2
    MUTECT_BOTH_VCF="Mutect1_And_Mutect2.merged.vcf" ; 
    cat $MUTECT1_VCF | grep -P '^#' > $MUTECT_BOTH_VCF
    cat $MUTECT1_VCF $MUTECT2_VCF | grep -Pv '^#' >> $MUTECT_BOTH_VCF

    #Run the merged VCF (from both mutects through Oncotator) 
    mkdir -pv oncotator_empty
    oncotator -i VCF --db-dir `pwd`/oncotator_empty/ -o VCF $MUTECT_BOTH_VCF oncotator.out hg19

    #Obtain a RAW call stats from mutect1 and mutect2 and run it through VEP
    #Run the VCF from the call stats through VEP



    >>>


output {
    File oncotator_out="oncotator.out hg19"
    File mutect1Merged="MuTect1.call_stats.txt"
    File mutect2Merged="MuTect2.call_stats.txt"
    }


runtime {
    docker: "broadinstitute/eddiescgimage"
    memory: "24 GB"
    defaultDisks: "local-disk 100 SSD"      
    }

}

workflow CallingGroupWorkflow {

File tumorBam
File tumorBamIdx
File normalBam
File normalBamIdx
File refFastaIdx
File mutectIntervals
File refFasta
File refFastaDict
String fracContam
File dbSNPVCF
File cosmicVCF
String downsampleToCoverage
File readGroupBlackList
File normalPanel    

# PREPARE FOR SCATTER
call CallSomaticMutations_131_Prepare {
    input: 
        tumorBam=tumorBam,
        tumorBamIdx=tumorBamIdx,
        normalBam=normalBam,
        normalBamIdx=normalBamIdx,
        refFastaIdx=refFastaIdx,
        mutectIntervals=mutectIntervals
    }

#SCATTER AND ANALYZE
scatter (i in CallSomaticMutations_131_Prepare.split_indices) {

        call Mutect1Task {
            input:
                tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
                normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
                mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
                refFasta=refFasta,
                refFastaIdx=refFastaIdx,
                refFastaDict=refFastaDict,
                fracContam=fracContam,
                dbSNPVCF=dbSNPVCF,
                cosmicVCF=cosmicVCF,
                downsampleToCoverage=downsampleToCoverage,
                readGroupBlackList=readGroupBlackList,
                normalPanel=normalPanel
            }

        call Mutect2Task {
            input:
                tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
                normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
                mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
                refFasta=refFasta,
                refFastaIdx=refFastaIdx,
                refFastaDict=refFastaDict,
                fracContam=fracContam,
                dbSNPVCF=dbSNPVCF,
                cosmicVCF=cosmicVCF,
                downsampleToCoverage=downsampleToCoverage,
                readGroupBlackList=readGroupBlackList,
                normalPanel=normalPanel
            }

        call MutectFCTask {
            input:
                tumorBam=CallSomaticMutations_131_Prepare.tumor_bams[i],
                normalBam=CallSomaticMutations_131_Prepare.normal_bams[i],
                mutectIntervals=CallSomaticMutations_131_Prepare.intervals[i],
                refFasta=refFasta,
                refFastaIdx=refFastaIdx,
                refFastaDict=refFastaDict,
                fracContam=fracContam,
                dbSNPVCF=dbSNPVCF,
                cosmicVCF=cosmicVCF,
                downsampleToCoverage=downsampleToCoverage,
                readGroupBlackList=readGroupBlackList,
                normalPanel=normalPanel
            }
        }


call GatherAndOncotateAndVEP {
    input:
        mutect1_cs=Mutect1Task.mutect1_cs,
        mutect1_pw=Mutect1Task.mutect1_pw,
        mutect1_cw=Mutect1Task.mutect1_cw,
        mutect2_cs=Mutect2Task.mutect2_cs,
        mutectfc_cs=MutectFCTask.mutectfc_cs,
        mutectfc_pw=MutectFCTask.mutectfc_pw,
        mutectfc_cw=MutectFCTask.mutectfc_cw
    }


}

Viewing all articles
Browse latest Browse all 1147

Trending Articles