cat GMGCbin.ngl
ngless "1.5"
local import "gmgc" version "1.0"
import "parallel" version "1.1"
import "samtools" version "1.0"
samples=load_sample_list('gmgc_bam.txt')
input = samfile(samples)
current=input.name()
##GMGC
##"How counts are adjusted in the presence of multiple annotations is defined by the multiple argument. Generally, for obtaining gene abundances, distribution of multiple mappers is the best (using multiple={dist1}), while for functional annotations, you want to count them all (using multiple={all1}). This implies that the functional annotations will sum to a higher value than the number of reads. This may seem strange at first, but it is the intended behaviour."
#gmgc_mapped = map (non_human_reads, reference='gmgc:no-rare',mode_all=True,block_size_megabases=10000)
#gmgc_mapped = map (non_human_reads, reference='gmgc',mode_all=True,block_size_megabases=10000)
#gmgc_mapped = map (input, reference='gmgc',mode_all=True)
#gmgc_mapped_post = select(gmgc_mapped) using |mr|:
# mr = mr.filter(min_match_size=45, min_identity_pc=95, action={drop})
# if not mr.flag({mapped}):
# discard
#write(mapstats(gmgc_mapped_post),ofile=current+'gmgc.full.stats.txt')
#write(gmgc_mapped_post,ofile=current+'gmgc.bam')
#Predicted_taxonomic_group
gmgc_counts_new = count(gmgc_mapped_post,
features=['seqname'],
multiple={all1},
normalization={raw},
discard_zeros=True)
collect(gmgc_counts_new,
current=current,
ofile='gmgc'</>'Metacardis_gmgc_genemap.all1.raw.txt')
head gmgc_bam.txt
gmgc/M0x10MCx1418filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx1472filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx1857filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx1985filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx2066filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx2112filtered_qc.fq.gzgmgc.bam
gmgc/M0x10MCx2152filtered_qc.fq.gzgmgc.bam
Thanks in advance!
Best wishes,
Ulrike
I now want to skip the mapping, read in the bam files and count and collect the results with different annotations.
However, I am not really sure how to accomplish this task. Can anyone share the snippets with me? This is what I've started with:cat GMGCbin.ngl
ngless "1.5"
local import "gmgc" version "1.0"
import "parallel" version "1.1"
import "samtools" version "1.0"
samples=load_sample_list('gmgc_bam.txt')
input = samfile(samples)
current=input.name()
gmgc_counts_new = count(gmgc_mapped_post,
features=['seqname'],
multiple={all1},
normalization={raw},
discard_zeros=True)
collect(gmgc_counts_new,
current=current,
ofile='gmgc'</>'Metacardis_gmgc_genemap.all1.raw.txt')