sample_bam = Channel
.from ( samples_keys )
.map {
[it, (params.sample_bam_map[it]) ] }
sample_bai = Channel
.from ( samples_keys )
.map {
[it, (params.sample_bam_map[it]) + ".bai" ] }
process gatk_hc {
tag "region $region_no for sample $sample_key"
cpus 4
memory '4 GB'
errorStrategy 'finish'
cache 'deep'
input:
set sample_key, val(sample) from sample_bam
set sample_key, val(sample_bai) from sample_bai
each region_list from params['references']['region_clusters']
file(genome)
file(genome_index)
file(genome_dict)
file(dbsnp)
file(dbsnp_index)
output:
set sample_key, region_no, file("reg-${region_no}.bed"), file("reg-${region_no}.g.vcf.gz"), file("reg-${region_no}.g.vcf.gz.tbi") into region_gvcf_ch1
script:
bed_str = region_list.join("\n").replace(":", "\t").replace("-", "\t")
region_no = generateMD5_A(region_list.toString())[0..8]
"""
echo "${bed_str}" > reg-${region_no}.bed;
samtools view -b -L reg-${region_no}.bed ${sample} > ${sample_key}.bqsr.reg-${region_no}.bam;
samtools index ${sample_key}.bqsr.reg-${region_no}.bam;
gatk-launch --java-options "-Xmx8G -XX:ConcGCThreads=${task.cpus} -XX:+UseConcMarkSweepGC -XX:ParallelGCThreads=${task.cpus}" HaplotypeCaller -R ${genome} --dbsnp ${dbsnp} -I ${sample_key}.bqsr.reg-${region_no}.bam -L reg-${region_no}.bed --emit-ref-confidence GVCF -O reg-${region_no}.g.vcf.gz
"""
}
And in the nextflow config tried as below.