> fileName = "data/kpgpB_chr20.vcf.gz"
> hasIndex(fileName)
[1] TRUE
> tbxKpgpchr20 = TabixFile(fileName)
> headerTabix(tbxKpgpchr20)
... (this works and gives apparently normal looking output)
> param <- GRanges("1", IRanges(1,width=7000000))
> resKpgp <- scanTabix(tbxKpgpchr20, param=param)[[1]]
##-------------------------------- the error -----------------------------------##
Error: scanTabix: '1' not present in tabix index
path: /vol/perry/NGS_CAMDA2013-Remote/data/kpgpB_chr20.vcf.gz
##-------------------------------- the error -----------------------------------##
Example 2:
> fileName = "data/kpgpB_chr1.vcf.gz"
> hasIndex(fileName)
# [1] TRUE
> tbxKpgp = TabixFile(fileName)
> param <- GRanges("1", IRanges(1,width=25000000))
> resKpgp <- scanTabix(tbxKpgp, param=param)[[1]]
## there are about 100k variants called in chromosome 1
str(resKpgp)
# chr [1:100854] "1\t10150\t-\tC\tT\t3.01\tNN\tAC=1;AF1=0.4997;AN=2;CI95=0.5,0.5;DP4=5,4,2,2;DP=23;FQ=4.77;MQ=35;PV4=1,0.013,1,1;SF=18\tGT:DS:GL\"| __truncated__ ...
> vcfKpgp = readVcf(tbxKpgp,"hg19",param)
##-------------------------------- the error -----------------------------------##
# Warning message:
# In doTryCatch(return(expr), name, parentenv, handler) :
# record 1 (and others?) FORMAT 'DS' not found
##-------------------------------- the error -----------------------------------##
Example 3:
> fileName = "data/KPGP1000_chr1.vcf.gz"
> hasIndex(fileName)
# [1] TRUE
> tbx = TabixFile(fileName)
## according to wikipedia, chr1 spans about 250 million bases
> param <- GRanges("1", IRanges(1,width=250000000))
> res <- scanTabix(tbx, param=param)[[1]]
## there are just over 3 million mutations listed
> length(res)
## this works
> param <- GRanges("1", IRanges(1,width=50000))
> vcf = readVcf(tbx,"hg19",param)
> vcf
# class: VCF
# dim: 52 1131
# genome: hg19
# exptData(1): header
# fixed(4): REF ALT QUAL FILTER
# info(37): G3 HWE ... AC AN
# geno(7): GQ DP ... DS GL
# rownames(52): - - ... - -
# rowData values names(1): paramRangeID
# colnames(1131): KPGP10 KPGP117 ... NA20826 NA20828
# colData names(1): Samples
## this doesn't work
> param <- GRanges("1", IRanges(1,width=52000))
> vcf = readVcf(tbx,"hg19",param)
##-------------------------------- the error -----------------------------------##
# Error in .Call2("new_XStringSet_from_CHARACTER", classname, elementType, :
# key 48 (char '0') not in lookup table
##-------------------------------- the error -----------------------------------##
Sesion Info:
> sessionInfo()
R version 2.15.3 (2013-03-01)
Platform: x86_64-pc-linux-gnu (64-bit)
locale:
[1] C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggbio_1.6.6 ggplot2_0.9.3.1 seqminer_0.2
[4] vcf2geno_2.0 GenomicFeatures_1.10.2 AnnotationDbi_1.20.7
[7] Biobase_2.18.0 VariantAnnotation_1.4.12 Rsamtools_1.10.2
[10] Biostrings_2.26.3 GenomicRanges_1.10.7 IRanges_1.16.6
[13] BiocGenerics_0.4.0 rj_1.1.0-4
loaded via a namespace (and not attached):
[1] BSgenome_1.26.1 DBI_0.2-5 Hmisc_3.10-1 MASS_7.3-23
[5] RColorBrewer_1.0-5 RCurl_1.95-4.1 RSQLite_0.11.2 XML_3.4-0
[9] biomaRt_2.14.0 biovizBase_1.6.2 bitops_1.0-5 cluster_1.14.4
[13] colorspace_1.2-1 dichromat_2.0-0 digest_0.6.3 grid_2.15.3
[17] gridExtra_0.9.1 gtable_0.1.2 labeling_0.1 lattice_0.20-15
[21] munsell_0.4 parallel_2.15.3 plyr_1.8 proto_0.3-10
[25] reshape2_1.2.2 rj.gd_1.1.0-1 rtracklayer_1.18.2 scales_0.2.3
[29] stats4_2.15.3 stringr_0.6.2 tools_2.15.3 zlibbioc_1.4.0
--
You received this message because you are subscribed to the Google Groups "CAMDA 2013 discussions" group.
To unsubscribe from this group and stop receiving emails from it, send an email to camda2013discu...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.
library(VariantAnnotation)
fileName = "data/kpgpB_chr20.vcf.gz"
tbxKpgpchr20 = TabixFile(fileName)
## there are about 63 million bases
param <- GRanges("20", IRanges(1,width=70000000))
vcfKpgpChr20 = readVcf(tbxKpgpchr20,"hg19",param)
# Warning message:
# In doTryCatch(return(expr), name, parentenv, handler) :
# record 1 (and others?) FORMAT 'DS' not found
vcfKpgpChr20
# class: VCF
# dim: 216151 39
# genome: hg19
# exptData(1): header
# fixed(4): REF ALT QUAL FILTER
# info(17): G3 HWE ... AC AN
# geno(6): GT GQ ... SP PL
# rownames(216151): - - ... - -
# rowData values names(1): paramRangeID
# colnames(39): KPGP10 KPGP117 ... KPGP97 KPGP9
# colData names(1): Samples