#!/usr/bin/env Rscript
# CIPHER DGE Script
# Input from CIPHER is several folders under a single directory located
# in results/rnaseq/salmon ... folders are named using ID from CIPHER
# ARGUMENTS: 1 -> directory of salmon folders
# 2 -> experiment file
# Import libraries
library("DESeq2")
library("tximport")
library("readr")
library("ReportingTools")
library("AnnotationDbi")
library("org.Hs.eg.db")
library("biomaRt")
# Set up variable to control command line arguments
args <- commandArgs(TRUE)
# Set up variable for the directory holding our sample folders
sample_id <- dir(args[1])
sample_id
# Set up variable to hold folder directory names
salmon_dirs <- sapply(sample_id, function(id) file.path(args[1], id, "quant.sf"))
salmon_dirs
# Read in experiment file
exp_file <- read.table(args[2], header = TRUE, stringsAsFactors=FALSE)
# Set up tximport annotation base
library(EnsDb.Hsapiens.v79)
txdf <- transcripts(EnsDb.Hsapiens.v79, return.type="DataFrame")
tx2gene <- as.data.frame(txdf[,c("tx_id", "gene_id")])
# Import files using tximport
txi.salmon <- tximport(salmon_dirs, type = "salmon", tx2gene = tx2gene, reader = read_tsv)
# Create a sampleTable of conditions
sampleTable <- data.frame(condition = factor(exp_file$condition))
rownames(sampleTable) <- colnames(txi.salmon$counts)
# Create DESeqDataset from Tximport
dds <- DESeqDataSetFromTximport(txi.salmon, sampleTable, ~condition)
dds <- DESeq(dds)
rld <- rlog(dds)
# Generate RESULTS from DESeq2 object
res <- results(dds)
res.05 <- results(dds, alpha=.05)
resSig <- subset(res, padj < 0.05)
resSig <- as.data.frame(resSig)
resSig$ensemblID <- rownames(resSig)
# Annotation of results
res$symbol <- mapIds(org.Hs.eg.db, keys=row.names(res),
column="SYMBOL",
keytype="ENSEMBL",
multiVals="first")
resSig$symbol <- mapIds(org.Hs.eg.db, keys=row.names(res),
column="SYMBOL",
keytype="ENSEMBL",
multiVals="first")
# Use BioMart to Add Ensembl Gene Annotations
mart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
add.anns <- function(df, mart, ...) {
nm <- rownames(df)
anns <- getBM(
attributes = c("ensembl_gene_id", "hgnc_symbol", "description"),
filters = "ensembl_gene_id", values = nm, mart = mart)
anns <- anns[match(nm, anns[, 1]), ]
colnames(anns) <- c("ID", "Gene Symbol", "Gene Description")
df <- cbind(anns, df[, -1, drop=FALSE])
rownames(df) <- nm
df
}
# Generate visualization plots
maplot <- plotMA(res.05, ylim=c(-5,5))
pcaplot <- plotPCA(rld, intgroup="condition")
# Generate an HTML report
htmlRep <- HTMLReport(shortName = "rnaseq_dge_report",
title = "CIPHER RNA-seq Report",
reportDirectory = "./reports")
publish("Table of top 1000 differentially expressed genes and their associated individual gene counts (boxplots)", htmlRep)
publish(dds, htmlRep, factor = colData(dds)$condition,
.modifyDF = list(add.anns, modifyReportDF), mart = mart,
reportDir="./reports")
publish("Table of all differentially expressed genes with a p adjusted value of < 0.05", htmlRep)
publish(resSig, htmlRep)
finish(htmlRep)