Quantcast
Channel: Ask the FireCloud Team — GATK-Forum
Viewing all articles
Browse latest Browse all 1147

Unable to scatter-gather over entities in firecloud

$
0
0

Hello,

I'm working on a pipeline to analyze methylation data. I have it configured to scatter over a set of participants and then gather the result into one result file for the participant set. This works when I run it locally treating the participants as an array of maps, but when I try to upload the WDL to firecloud it fails on submission with the error "methpipeagg.samples - Attribute expression returned a list of entities." Would it be possible to treat participants/samples as map-like objects where the attributes can be accessed by name?

WDL:

task bsmap {
  File fastq1
  File fastq2
  File genome
  String sample
  command {
         bsmap -a ${fastq1} -b ${fastq2} -d ${genome} -p 4 -v 0.05 -s 16 -r 0 -u -S 1 -R -o ${sample}_raw_bs.bam
  }
  runtime {
          docker: "adunford/methy:9"
          #memory: "16 GB"
          #defaultDisks: "local-disk 100 SSD"
  }
  output {
         File raw_bs_bam = "./${sample}_raw_bs.bam"
  }
}
task samtools_sort {
  File raw_bs_bam
  String sample_id
  command {
          samtools sort ${raw_bs_bam} ${sample_id}_bs.sorted && samtools index ${sample_id}_bs.sorted.bam
  }
  runtime {
          docker: "adunford/methy:9"
          #memory: "16 GB"
          #defaultDisks: "local-disk 100 SSD"
  }
  output {

         File sorted_bs_bam   = "${sample_id}_bs.sorted.bam"

  }
}

task samtools_read_metrics{
     File sorted_bs_bam
     String sample_id
     command{
        echo ${sample_id} `samtools view ${sorted_bs_bam} | wc -l` `samtools view -F 4 ${sorted_bs_bam} | wc -l` > ${sample_id}.read_metrics.txt
     }
     runtime{
        docker: "adunford/methy:9"
     }
     output {
        File read_metrics = "${sample_id}.read_metrics.txt"
     }
}

task MethylDackel {
        File genome
        File sorted_bs_bam
        String sample_id
        command {
                MethylDackel extract ${genome} ${sorted_bs_bam} -o ${sample_id}
                grep -v '^track' ${sample_id}_CpG.bedGraph  > tmp
                mv tmp ${sample_id}_CpG.bedGraph
}
        runtime {
                docker: "adunford/methy:9"
        }
        output {
                File bed = "${sample_id}_CpG.bedGraph"
        }
}

task MethylDackel_CHH {
     File genome
     File sorted_bs_bam
     String sample_id
     command {
             MethylDackel extract --CHH ${genome} ${sorted_bs_bam} -o ${sample_id}
     }
     runtime {
             docker: "adunford/methy:9"
     }
     output {
            File chh_bed = "${sample_id}_CHH.bedGraph"
     }
}

task bs_conversion_rate{
     File chh_bed
     String sample_id
     command {
             sh /executable_files/collect_bsconv_metrics.sh ${sample_id} ${chh_bed}
     }
     runtime {
             docker: "adunford/methy:9"
     }
     output{
             File bsconv = "${sample_id}_bsconv.txt"
     }
}

task create_rda {
     File bed
     String sample_id
     File bsconv
     File read_metrics
     command{
             Rscript /Rscripts/create_rda_wrapper.R -f ${bed} -o ${sample_id}.rda -b ${bsconv} -r ${read_metrics}
     }
     runtime {
             docker: "adunford/methy:9"
     }
     output {
            File rda = "${sample_id}.rda"
     }

}

task combine_rda {
     Array[File] individual_rdas
     String set_name

     command {
                Rscript /Rscripts/combine_rda_wrapper.R --rdaFiles ${sep=',' individual_rdas} --outFile ${set_name}
        }
        runtime {
                docker: "adunford/methy:9"
        }
        output  {
                File combined_rda = "${set_name}.combined.rda"
        }
}

task qc_report {
     File combined_rda
     #String organism
     #String outdir
     #File genome
     command {
             Rscript /Rscripts/generate_qc_report.R -f ${combined_rda}  -o $PWD 
     }
     runtime {
             docker: "adunford/methy:9"
     }
     output {
            File qc_report = "qcReport.html"
     }
}

workflow methpipeagg {
         File genome
         String set_name
         Array[Map[String,String]] samples
         scatter (sample in samples){
                 call bsmap     {input: sample = sample["participant_id"], fastq1 = sample["bs_fastq1"], fastq2 = sample["bs_fastq2"], genome = genome}
                 call samtools_sort {input: raw_bs_bam = bsmap.raw_bs_bam, sample_id = sample["participant_id"] }
                 call samtools_read_metrics {input: sorted_bs_bam = samtools_sort.sorted_bs_bam, sample_id = sample["participant_id"]}
                 call MethylDackel {input: sorted_bs_bam = samtools_sort.sorted_bs_bam, sample_id = sample["participant_id"], genome = genome}
                 call MethylDackel_CHH  {input: sorted_bs_bam = samtools_sort.sorted_bs_bam, sample_id = sample["participant_id"], genome = genome}
                 call bs_conversion_rate {input: chh_bed = MethylDackel_CHH.chh_bed, sample_id = sample["participant_id"]}
                 call create_rda    {input: bed = MethylDackel.bed, sample_id = sample["participant_id"], bsconv = bs_conversion_rate.bsconv, read_metrics = samtools_read_metrics.read_metrics}
         }
         call combine_rda {input: individual_rdas = create_rda.rda, set_name = set_name}
         call qc_report {input: combined_rda =  combine_rda.combined_rda}

}

And JSON that works when run in local environment:

{
    "methpipeagg.set_name" : "Mus_musculus",
    "methpipeagg.genome" : "/home/adunford/Mus_musculus/Mus_musculus_assembly10.fasta",
    "methpipeagg.samples" : [ {"sample_id" : "sc-RRBS-zygote_01",
                               "fastq1":"/home/adunford/Mus_musculus/sc-RRBS-zygote_01_R1.fastq.gz",
                               "fastq2" : "/home/adunford/Mus_musculus/sc-RRBS-zygote_01_R2.fastq.gz"},
                              {"sample_id" : "sc-RRBS-zygote_02",
                               "fastq1" : "/home/adunford/Mus_musculus/sc-RRBS-zygote_02_R1.fastq.gz",
                               "fastq2" : "/home/adunford/Mus_musculus/sc-RRBS-zygote_02_R2.fastq.gz"},
                              {"sample_id" : "sc-RRBS-zygote_03",
                               "fastq1" : "/home/adunford/Mus_musculus/sc-RRBS-zygote_03_R1.fastq.gz",
                               "fastq2" : "/home/adunford/Mus_musculus/sc-RRBS-zygote_03_R2.fastq.gz"}],

    "methpipeagg.qc_report.outdir" : "."
}

Viewing all articles
Browse latest Browse all 1147

Trending Articles