I get an error running DE analysis on my single cell RNA-Seq data:
> fpkm_matrix <- read.table('genes.fpkm_table', sep="\t", header = TRUE, row.names=1) # fpkm matrix
> sample_sheet<- read.delim("samples.table", row.names=1) # sample sheet
> gene_annotations<- read.delim("genes.attr_table", row.names=1) # gene annotations
>
> fd<- new("AnnotatedDataFrame", data = gene_annotations)
>
> pheno.data <- colnames(fpkm_matrix)
> pheno.data <- unlist(lapply(pheno.data, function(x) strsplit(x, '_')[[1]][1]))
> pheno.data.df <- data.frame(type=pheno.data)
> rownames(pheno.data.df) <- colnames(fpkm_matrix)
> pd <- new('AnnotatedDataFrame', data = pheno.data.df)
>
> CDS <- new('CellDataSet', exprs = as.matrix(fpkm_matrix), phenoData = pd, featureData = fd) # Create new CellDataSet object
> CDS <- detectGenes(CDS, min_expr = 0.1) # how many are expressed by a particular gene or how many genes are expressed by a given cell
>
> CDS
CellDataSet (storageMode: lockedEnvironment)
assayData: 39171 features, 90 samples
element names: exprs
protocolData: none
phenoData
sampleNames: GFP_2 GFP_5 ... GFPminus_22 (90 total)
varLabels: type num_genes_expressed
varMetadata: labelDescription
featureData
featureNames: ENSMUSG00000000001 ENSMUSG00000000003 ...
ENSMUSG00000099334 (39171 total)
fvarLabels: class_code nearest_ref_id ... num_cells_expressed (8
total)
fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:
> diff_test_res <- differentialGeneTest(CDS, fullModelFormulaStr = "expression~type")
Error in if (expressionFamily@vfamily %in% c("zanegbinomialff", "negbinomial", :
argument is of length zero
> head(pData(CDS))
type num_genes_expressed
GFP_2 GFP 3293
GFP_5 GFP 5747
GFP_14 GFP 8242
GFP_48 GFP 4716
GFP_34 GFP 5874
GFP_7 GFP 3738
Could you please let me know if I missed something here.
Thanks
Sharvari
library("monocle")
library("reshape")
fpkm_matrix <- read.table('genes.fpkm_table', sep="\t", header = TRUE, row.names=1) # fpkm matrix
sample_sheet<- read.delim("samples.table", row.names=1) # sample sheet
gene_annotations<- read.delim("genes.attr_table", row.names=1) # gene annotations
fd<- new("AnnotatedDataFrame", data = gene_annotations)
pheno.data <- colnames(fpkm_matrix)
pheno.data <- unlist(lapply(pheno.data, function(x) strsplit(x, '_')[[1]][1]))
pheno.data.df <- data.frame(type=pheno.data)
rownames(pheno.data.df) <- colnames(fpkm_matrix)
pd <- new('AnnotatedDataFrame', data = pheno.data.df)
CDS <- newCellDataSet(as.matrix(fpkm_matrix), phenoData = pd, featureData = fd) # Create new CellDataSet object
CDS <- detectGenes(CDS, min_expr = 0.1) # how many are expressed by a particular gene or how many genes are expressed by a given cell
# DE analysis
log <- capture.output({diff_test_res <- differentialGeneTest(CDS, fullModelFormulaStr = "expression~type")})
# Select genes that are significant at an FDR < 10%
sig_genes <- subset(diff_test_res, qval < 0.1)
Hope this helps.
Thanks
Sharvari
--
You received this message because you are subscribed to a topic in the Google Groups "Monocle" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/monocle-js/GvXuJscQgQk/unsubscribe.
To unsubscribe from this group and all its topics, send an email to monocle-js+...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
library("monocle")
library("reshape")
fpkm_matrix <- read.table('genes.fpkm_table', sep="\t", header = TRUE, row.names=1) # fpkm matrix
sample_sheet<- read.delim("samples.table", row.names=1) # sample sheet
gene_annotations<- read.delim("genes.attr_table", row.names=1) # gene annotations
fd<- new("AnnotatedDataFrame", data = gene_annotations)
pheno.data <- colnames(fpkm_matrix)
pheno.data <- unlist(lapply(pheno.data, function(x) strsplit(x, '_')[[1]][1]))
pheno.data.df <- data.frame(type=pheno.data)
rownames(pheno.data.df) <- colnames(fpkm_matrix)
pd <- new('AnnotatedDataFrame', data = pheno.data.df)
CDS <- newCellDataSet(as.matrix(fpkm_matrix), phenoData = pd, featureData = fd) # Create new CellDataSet object
CDS <- detectGenes(CDS, min_expr = 0.1) # how many are expressed by a particular gene or how many genes are expressed by a given cell
# DE analysis
log <- capture.output({diff_test_res <- differentialGeneTest(CDS, fullModelFormulaStr = "expression~type")})
# Select genes that are significant at an FDR < 10%
sig_genes <- subset(diff_test_res, qval < 0.1)
Hope this helps.
Thanks