# cluster function on matrix and return hierarchical plot
# x matrix each column is a sample
# dist.method method to get the distance between samples
# hclust.method the agglomeration method to be used
# plot if TRUE, plot the hierarchical clustering
.cluster=function(x, dist.method="correlation", hclust.method="ward", plot=TRUE,
treatment=treatment,sample.ids=sample.ids,context, my.cols){
DIST.METHODS <- c("correlation", "euclidean", "maximum", "manhattan", "canberra",
"binary", "minkowski")
dist.method <- pmatch(dist.method, DIST.METHODS)
HCLUST.METHODS <- c("ward", "single", "complete", "average", "mcquitty",
"median", "centroid")
hclust.method <- pmatch(hclust.method, HCLUST.METHODS)
if (
is.na(hclust.method))
stop("invalid clustering method")
if (hclust.method == -1)
stop("ambiguous clustering method")
if(DIST.METHODS[dist.method] == "correlation")
d = .dist.cor(t(x))
else
d=dist(scale(t(x)), method=DIST.METHODS[dist.method]);
hc=hclust(d, HCLUST.METHODS[hclust.method]);
if(plot){
#plclust(hc,hang=-1, main=paste("CpG dinucleotide methylation clustering\nDistance method: ",
# DIST.METHODS[dist.method],sep=""), xlab = "Samples");
# plot
treatment=treatment
sample.ids=sample.ids
col.list=as.list(my.cols[treatment+1])
names(col.list)=sample.ids
colLab <- function(n,col.list)
{
if(is.leaf(n))
{
a <- attributes(n)
attr(n, "nodePar") <- c(a$nodePar, list(lab.col =
col.list[[a$label]], lab.cex=1,
col=col.list[[a$label]], cex=1, pch=16 ))
}
n
}
dend = as.dendrogram(hc)
dend_colored <- dendrapply(dend, colLab,col.list)
plot(dend_colored, main = paste(context, "methylation clustering"));
# end of plot
}
return(hc)
}
setGeneric("clusterSamples", function(.Object, dist="correlation", method="ward",
sd.filter=TRUE,sd.threshold=0.5,
filterByQuantile=TRUE, plot=TRUE, my.cols=rainbow(length(unique(treatment)), start=1, end=0.6))
standardGeneric("clusterSamples"))
#' @rdname clusterSamples-methods
#' @aliases clusterSamples,methylBase-method
setMethod("clusterSamples", "methylBase",
function(.Object, dist, method ,sd.filter, sd.threshold,
filterByQuantile, plot, my.cols)
{
mat =getData(.Object)
# remove rows containing NA values, they might be introduced at unite step
mat =mat[ rowSums(
is.na(mat))==0, ]
meth.mat = mat[, .Object@numCs.index]/
(mat[,.Object@numCs.index] + mat[,.Object@numTs.index] )
names(meth.mat)=.Object@sample.ids
# if Std. Dev. filter is on remove rows with low variation
if(sd.filter){
if(filterByQuantile){
sds=rowSds(as.matrix(meth.mat))
cutoff=quantile(sds,sd.threshold)
meth.mat=meth.mat[sds>cutoff,]
}else{
meth.mat=meth.mat[rowSds(as.matrix(meth.mat))>sd.threshold,]
}
}
.cluster(meth.mat, dist.method=dist, hclust.method=method,
plot=plot, treatment=.Object@treatment,
sample.ids=.Object@sample.ids,
context=.Object@context, my.cols)
}
)
(I'm sure this is overly simplistic and that you could come up with a better solution. No need to cite any contribution, but figured I'd share how I'm doing it.)