Qiime 1.8.0 biom files work in Phyloseq, but Qiime 1.9.0 biom files do not.
Here is the error.
Error in fromJSON(content, handler, default.size, depth, allowComments, :
invalid JSON input
I suppose that I can use differential_abundance.py now, but I like the figures that phyloseq/ggplot2 makes
as well as my MA plots and DESeq2's 2D PCoA plots based solely on OTU abundance (euclidean distances).
I wonder if QIIME could add some figure options to this module. Also, I are the taxonomies included in the ouput OTU tables from differential_abundance.py?
I am going to look at this today.
Here is my R code that I use when implementing phyloseq. See additional comments and additional R code below for multifactor DESeq2.
> data <- import_biom("IAvsIP.biom")
> x <- read.table("IAvsIP.txt", header=TRUE, sep="\t", row.names=1)
> x
Treatment Type Animal MergedReads
Phage-Cecum-18-F11 Phage Cecum 18 65853
Phage-Cecum-14-E11 Phage Cecum 14 106799
Phage-Cecum-11-B09 Phage Cecum 11 106897
Phage-Cecum-23-H11 Phage Cecum 23 109586
Phage-Cecum-12-C10 Phage Cecum 12 113183
Antibiotic-Cecum-7-B12 Antibiotic Cecum 7 37891
Antibiotic-Cecum-17-F02 Antibiotic Cecum 17 131735
Antibiotic-Cecum-20-G02 Antibiotic Cecum 20 149945
Antibiotic-Cecum-22-H04 Antibiotic Cecum 22 293886
Antibiotic-Cecum-15-E02 Antibiotic Cecum 15 470301
> sampledata = sample_data(data.frame(x))
> data2 = merge_phyloseq(data, sampledata)
> data2
phyloseq-class experiment-level object
otu_table() OTU Table: [ 28733 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 4 sample variables ]
tax_table() Taxonomy Table: [ 28733 taxa by 7 taxonomic ranks ]
> diagdds = phyloseq_to_deseq2(data2, ~ Treatment)
converting counts to integer mode
> diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
> res = results(diagdds, cooksCutoff = FALSE)
> alpha = 0.05
> sigtab = res[which(res$padj < alpha), ]
> sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(data2)[rownames(sigtab), ], "matrix"))
> dim(sigtab)
[1] 75 13
> res = cbind(as(res, "data.frame"), as(tax_table(data2)[rownames(res), ], "matrix"))
> res = cbind(as(res, "data.frame"), as(otu_table(data2)[rownames(res), ], "matrix"))
> write.table(res, file = "DESeq2_Output_IAvsIP.txt", sep = "\t", col.names=NA)
> theme_set(theme_bw())
> scale_fill_discrete <- function(palname = "Set1", ...) {
+ scale_fill_brewer(palette = palname, ...)
+ }
> x = tapply(sigtab$log2FoldChange, sigtab$Rank2, function(x) max(x))
> x = sort(x, TRUE)
> sigtab$Phylum = factor(as.character(sigtab$Rank2), levels=names(x))
> x = tapply(sigtab$log2FoldChange, sigtab$Rank6, function(x) max(x))
> x = sort(x, TRUE)
> sigtab$Genus = factor(as.character(sigtab$Rank6), levels=names(x))
> pdf(file = "IAvsIP.pdf")
> ggplot(sigtab, aes(x=Genus, y=log2FoldChange, color=Phylum)) + geom_point(size=6) +
+ theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
> dev.off()
null device
1
> pdf(file="IAvsIP_MA.pdf")
> plot(res$baseMean,res$log2FoldChange,ylim=c(-12,12),main="IAvsIP", log = "x", pch = 19, cex = ifelse(res$padj < .05, 0.6, 0.4), col = ifelse(res$padj < .05, "red", "black"), xlab="mean OTU abundance", ylab = "log fold change", xlim = c(1,10000))
Warning message:
In xy.coords(x, y, xlabel, ylabel, log) :
22544 x values <= 0 omitted from logarithmic plot
> dev.off()
null device
1
> rld <- rlog(diagdds)
> pdf(file="IAvsIP_PCA.pdf")
> plotPCA(rld, intgroup="Treatment")
> dev.off()
null device
1
*****
I have also done mutlifactor DESeq2. It is also important to know what group the log2 ratios are relative to. Note code in bold Red.
*****
library(DESeq2)
library("phyloseq")
library("ggplot2")
library("plyr")
data <- import_biom("AllCvsAllP.biom")
x <- read.table("AllCvsAllP.txt", header=TRUE, sep="\t", row.names=1)
sampledata = sample_data(data.frame(x))
data2 = merge_phyloseq(data, sampledata)
data2
diagdds = phyloseq_to_deseq2(data2, ~ Type + Treatment)
diagdds$Treatment <- relevel(diagdds$Treatment, "Control")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(data2)[rownames(sigtab), ], "matrix"))
dim(sigtab)
res = cbind(as(res, "data.frame"), as(tax_table(data2)[rownames(res), ], "matrix"))
res = cbind(as(res, "data.frame"), as(otu_table(data2)[rownames(res), ], "matrix"))
write.table(res, file = "DESeq2_Output_AllCvsAllP.txt", sep = "\t", col.names=NA)
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Rank2, function(x) max(x))
x = sort(x, TRUE)
sigtab$Phylum = factor(as.character(sigtab$Rank2), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Rank6, function(x) max(x))
x = sort(x, TRUE)
sigtab$Genus = factor(as.character(sigtab$Rank6), levels=names(x))
pdf(file = "AllCvsAllP.pdf")
ggplot(sigtab, aes(x=Genus, y=log2FoldChange, color=Phylum)) + geom_point(size=6) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
pdf(file="AllCvsAllP_MA.pdf")
plot(res$baseMean,res$log2FoldChange,ylim=c(-12,12),main="AllCvsAllP", log = "x", pch = 19, cex = ifelse(res$padj < .05, 0.6, 0.4), col = ifelse(res$padj < .05, "red", "black"), xlab="mean OTU abundance", ylab = "log fold change", xlim = c(1,10000))
dev.off()
rld <- rlog(diagdds)
pdf(file="AllCvsAllP_PCA.pdf")
plotPCA(rld, intgroup=c("Treatment", "Type"))
dev.off()