LSD bars by time extraction from pylr then plotting

Brandon Hurr

unread,

Jul 13, 2010, 2:13:01 PM7/13/10

to ggplot2

Hi all,

I'm trying to create a series of graphs for a publication wherein we have measurements over time. I'm using the agricolae package to find the LSD value by day between treatments. I have no idea how to get the values out using l_ply() and then put those error bars onto the graph.

Any ideas?

Thanks,

Brandon

#Begin R code

#Load required packages

require(agricolae)

require(ggplot2)

require(plotrix)

#Data

cuke<-structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,

1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,

3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,

2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,

2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,

4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L

), .Label = c("Con", "ConM", "Eth", "EthM"), class = "factor"),

Day = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,

0L, 0L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,

3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,

6L, 6L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,

9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,

12L, 12L, 12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 6L, 6L,

6L, 6L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,

12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,

0L, 0L, 0L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,

3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,

6L, 6L, 6L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,

9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,

12L, 12L, 12L, 12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L,

6L, 6L, 6L, 6L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L,

12L, 12L, 12L, 12L), Hue = c(123.79, 126.82, 123.24, 122.5,

125.44, 124.11, 125.99, 122.7, 119.09, 124.96, 125.46, 124.27,

125.92, 126.29, 125.57, 126.52, 125.38, 126.98, 125.22, 126.02,

126.01, 124.73, 124.13, 127.15, 126.78, 126.92, 127.15, 126.44,

126.59, 126.63, 123.87, 121.96, 123.2, 123.43, 122.98, 124.48,

123.74, 123.96, 124.32, 124.32, 125.04, 126.46, 126.66, 125.51,

121.44, 123.19, 124.46, 124.57, 122.62, 123.56, 122.81, 124.29,

124.16, 122.25, 123.14, 124.1, 124.02, 123.64, 124.98, 124.8,

123.19, 124.46, 124.57, 122.62, 123.56, 122.81, 124.29, 124.16,

122.25, 123.14, 124.1, 124.02, 123.64, 124.98, 123.79, 126.82,

122.5, 125.44, 125.99, 122.7, 119.09, 123.57, 122.82, 125.18,

119.55, 123.26, 119.37, 118.79, 115.25, 115.67, NA, NA, NA,

NA, NA, NA, NA, NA, NA, 123.99, 123.89, 124.76, 121.45, 123.06,

125.55, 127, 123.92, 124.29, 120.91, 125.69, 124.25, 124.11,

123.67, 123.21, 123.56, 126.69, 126.14, 125.37, 126.76, 127.41,

122.39, 122.65, 122.24, 123.11, 125.63, 123.21, 123.39, 123.08,

123.54, 124.69, 123.21, 121.3, 121.82, 123.9, 123.42, 123.92,

119.89, 120.68, 119.42, 119.57, 119.85, 123.17, 125.96, 125.75,

121.14, 125.01, 124.03, 124.56, 121.84, 120.25, 118.74, 121.94,

117.09, 119.7, 111.07, 118.03, 119.32, 118.92, 121.07, 114.65,

111.78, 113.44, 115.05, 112.74, 115.45, 121.8, 120.91, 122.49,

123.99, 118.47, 123.31, 116.42, 114.15, 123.99, 123.89, 121.45,

123.06, 127, 123.92, 124.29, 119.01, 116.52, 113.18, 113.56,

107.57, 107.52, 112.53, 117.23, 117.32, 95.86572469, 101.1846093,

101.8936238, 106.3913542, 105.6866304, 99.76811364, 107.0792098,

108.0885293, 95.76880979)), .Names = c("Treatment", "Day",

"Hue"), class = "data.frame", row.names = c(NA, -198L))

#returns a list with the aov by day in a list

cukebyday.aov <- dlply(cuke, .(Day), function(x) aov(Hue ~ Treatment, data=x))

#returns critical LSD Mean comparison

l_ply(res.aov, function(x) LSD.test(x, "Treatment"))

#Need to get LSD values from l_ply or something

#computes mean and SEM for plotting

dataset.avg<-ddply(cuke, c("Treatment", "Day"), function(df) return(c(dataset.avg=mean(df[[3]],na.rm=TRUE), dataset.se=std.error(df[[3]]), na.rm=TRUE)))

#plots mean and SEM (want LSD)

avg.plot<-qplot(x=Day, dataset.avg, data=dataset.avg, geom=c("line","point"), ylab="Hue", main="Cuke Hue", color=Treatment)

avg.plot<- avg.plot+geom_errorbar(aes(ymax=dataset.avg+dataset.se, ymin=dataset.avg-dataset.se), width=0.5)+theme_bw()+opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))

avg.plot

Joe

unread,

Jul 14, 2010, 11:19:14 AM7/14/10

to ggplot2

I'm not sure exactly what this test is, but using ldply is probably
the first step you want.

lsd <- ldply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

and now, I'm guessing this is the plot you want, based on the LSD.test
man page.

p <- ggplot(lsd, aes(trt, means))+
geom_bar(aes(fill = M))+
facet_wrap(~Day)

There's some funkyness with the levels of M, so you might want to do
something like

levels(lsd$M) <- sub(" *", "", levels(lsd$M))

to clean that up.

Brandon Hurr

unread,

Jul 14, 2010, 1:18:44 PM7/14/10

to Joe, ggplot2

Joe,

Thanks for the tips. What you have done is really interesting. Coloring by grouping isn't something I was considering since it's likely the publication will be in black and white, but I might throw it at the old advisor anyway for S's & giggles.

What I'm actually trying to do is to graph the trend over time on a single graph with the four treatments, which is why I used geom_line and geom_point. (I really need to get away from using qplot.) Instead of SEM error bars, I'd like to use LSD or some other means separation test to make error bars for each day so that you can see if there is significance or not between treatments on a given day. Does that make sense?

It looks like your "lsd" output has most all of the information I need to do that. However, if you read the l_ply() output the Least significant difference line is the number I'm interested in.

Study:

LSD t Test for Hue

Mean Square Error: 12.32855

Treatment, means and individual ( 95 %) CI

Hue std.err replication LCL UCL

Con 123.6993 0.2155098 14 123.26132 124.1373

ConM 117.4750 1.1348683 14 115.16867 119.7813

EthM 102.4141 1.5648717 9 99.23387 105.5943

alpha: 0.05 ; Df Error: 34

Critical Value of t: 2.032245

Least Significant Difference 2.936135

Harmonic Mean of Cell Sizes 11.8125

Means with the same letter are not significantly different.

Groups, Treatments and means

a Con 123.6993

b ConM 117.475

c EthM 102.4141

It's my first time trying to use the plyr functions, so I'm not sure I quite understand why the l_ply function and ldply function had such different outputs, and I'm still not quite sure how to get the LSD value out.

I don't suppose there is a section in the ggplot2 book about the plyr functions? I just bought it and I'm looking forward to understanding more.

Thanks,

Brandon

###New code

lsd.l_ply<-l_ply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

#Returns data frame with mean, SEM, grouping factor, UCI, LCI

lsd <- ldply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

p <- ggplot(lsd, aes(Day, means))+

geom_line(aes(color= trt))+

geom_point(aes(shape = trt)) +

geom_errorbar(aes(ymax=UCI, ymin=LCI, color=M), width=0.5)+

opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))

p

##END NEW CODE

--
You received this message because you are subscribed to the ggplot2 mailing list.
Please provide a reproducible example: http://gist.github.com/270442

To post: email ggp...@googlegroups.com
To unsubscribe: email ggplot2+u...@googlegroups.com
More options: http://groups.google.com/group/ggplot2

Josef Fruehwald

unread,

Jul 14, 2010, 1:37:39 PM7/14/10

to Brandon Hurr, ggplot2

Brandon,

With the plyr functions, the first letter indicates the input data format, and the second letter indicates the output data format. l_ply() produces no output. The stuff it prints is just what LSD.test() prints when it's run, but apparently the object it produces isn't that.

With ldply(), you're saying, "Take every element of the input list, apply a function, coerce the output of that function into a data frame, then paste it all together." I just hoped that the output of LSD.test() would be nicely coercible into a dataframe, and it was.

As for your plot, I would not mix what the colors mean. In your plot, you use them to indicate both treatment and lsd group. I'd indicate lsd group with the point shape, then connect treatments with the same colored line, then add error bars in the same color as the treatment.

p <- ggplot(lsd, aes(Day, means))+

  geom_line(aes(colour = trt))+
  geom_errorbar(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5)+
  geom_point(aes(shape = M),size = 3)

Josef Fruehwald

unread,

Jul 14, 2010, 1:40:23 PM7/14/10

to Brandon Hurr, ggplot2

Also, geom_ribbon() creates more visual noise, but I sometimes prefer its appearance to errorbars.

ggplot(lsd, aes(Day, means))+
geom_line(aes(colour = trt))+

geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+

geom_point(aes(shape = M),size = 3)

Brandon Hurr

unread,

Jul 14, 2010, 1:59:31 PM7/14/10

to Josef Fruehwald, ggplot2

I share your appreciation of geom_ribbon() as it's certainly easier to make out which is significantly different from what as long as you don't have too many treatments.

The reason I did the colors for both was that if you had only:

p <- ggplot(lsd, aes(Day, means))+

geom_line()+

geom_point(aes(shape = trt)) +

geom_errorbar(aes(ymax=UCI, ymin=LCI, color=M), width=0.5)+

opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))

p

It doesn't connect the lines by treatment. I couldn't figure out how to do it quickly so I just left it with extra colors to show what I was going for.

Any idea how to pull out the LSD number I highlighted? It seems like it doesn't make it into the ldply() dataframe. I suppose an alternative would be to use geom_text() to put the grouping next to each point.

> lsd

Day trt means M N std.err LCI UCI

1 0 Con 124.4100 a 9.545455 0.5102194 123.37881 125.4412

2 0 ConM 123.9833 a 9.545455 0.3944649 123.18609 124.7806

3 0 EthM 123.9429 a 9.545455 0.6261360 122.67739 125.2083

4 0 Eth 123.7614 a 9.545455 0.9961818 121.74807 125.7748

5 3 Con 126.1767 a 15.000000 0.2380890 125.68896 126.6644

6 3 ConM 124.3447 b 15.000000 0.4596343 123.40315 125.2862

7 6 Con 124.0913 a 7.500000 0.3754716 123.32984 124.8528

8 6 Eth 122.8760 a 7.500000 0.9220336 121.00603 124.7460

9 6 ConM 122.4367 a 7.500000 0.5780366 121.26435 123.6090

10 6 EthM 113.9680 b 7.500000 1.9190659 110.07595 117.8600

11 9 Con 123.7727 a 6.315789 0.2136273 123.33852 124.2068

12 9 ConM 120.1807 b 6.315789 0.8909639 118.37001 121.9913

13 9 Eth 117.2700 b 6.315789 1.0551777 115.12562 119.4144

14 9 EthM 113.6500 c 6.315789 2.3294599 108.91597 118.3840

15 12 Con 123.6993 a 11.812500 0.2155098 123.26132 124.1373

16 12 ConM 117.4750 b 11.812500 1.1348683 115.16867 119.7813

17 12 EthM 102.4141 c 11.812500 1.5648717 99.23387 105.5943

Brandon Hurr

unread,

Jul 14, 2010, 2:11:46 PM7/14/10

to Josef Fruehwald, ggplot2

Joe,

Thanks a ton. I'll keep plugging away and if I figure it out, post it here for posterity.

Brandon

On Wed, Jul 14, 2010 at 19:09, Josef Fruehwald <jofr...@gmail.com> wrote:

I would avoid using geom_text() to annotate points. In the code I had, the grouping is indicated by the point shape. If you don't like that so much, you might try:

ggplot(lsd, aes(Day, means))+

geom_line(aes(colour = trt))+
geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+

geom_text(aes(label = M))

But definitely don't use both point shape and group label.

As for getting the LSD number, I really have no idea. LSD.test() doesn't return an object which has that statistic in it, or extractable. Maybe you can calculate it from the output it does produce, but other than that I'm clueless.

-Joe

Josef Fruehwald

unread,

Jul 14, 2010, 2:09:59 PM7/14/10

to Brandon Hurr, ggplot2

I would avoid using geom_text() to annotate points. In the code I had, the grouping is indicated by the point shape. If you don't like that so much, you might try:

ggplot(lsd, aes(Day, means))+

geom_line(aes(colour = trt))+

geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+

geom_text(aes(label = M))

But definitely don't use both point shape and group label.

As for getting the LSD number, I really have no idea. LSD.test() doesn't return an object which has that statistic in it, or extractable. Maybe you can calculate it from the output it does produce, but other than that I'm clueless.

-Joe

Brandon Hurr

unread,

Jul 22, 2010, 12:09:48 PM7/22/10

to ggplot2

Dear all,

For posterity's sake I thought I'd put this up here. I've got a graph pretty close to what I want, just a bit more tweaking. The important part was that I figured out how to get the LSD values out of LSD.test() from the Agricolae package. I hacked it to force the output of the LSD value and created a new function called LSD.test.test(). With clear separation annotating the grouping with geom-text works pretty well too, although with much more complex data the groupings become a bit too long (cdefg, cdefgh, etc...)

Many thanks to Joe Fruehwald and Mike Lawrence for all their help.

Brandon

P.S. I think I've learned enough to be dangerous. :O

###Begin R code

LSD.test.test<-function (y, trt, DFerror, MSerror, alpha = 0.05, p.adj = c("none",

"holm", "hochberg", "bonferroni", "BH", "BY", "fdr"), group = TRUE,

main = NULL)

{

p.adj <- match.arg(p.adj)

clase <- c("aov", "lm")

name.y <- paste(deparse(substitute(y)))

name.t <- paste(deparse(substitute(trt)))

if ("aov" %in% class(y) | "lm" %in% class(y)) {

A <- y$model

DFerror <- df.residual(y)

MSerror <- deviance(y)/DFerror

y <- A[, 1]

ipch <- pmatch(trt, names(A))

if (is.na(ipch))

return(cat("Name: ", trt, "\n", names(A)[-1], "\n"))

name.t <- names(A)[ipch]

trt <- A[, ipch]

name.y <- names(A)[1]

}

junto <- subset(data.frame(y, trt), is.na(y) == FALSE)

means <- tapply.stat(junto[, 1], junto[, 2], stat = "mean")

sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")

nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")

std.err <- sds[, 2]/sqrt(nn[, 2])

Tprob <- qt(1 - alpha/2, DFerror)

LCL <- means[, 2] - Tprob * std.err

UCL <- means[, 2] + Tprob * std.err

means <- data.frame(means, std.err, replication = nn[, 2],

LCL, UCL)

names(means)[1:2] <- c(name.t, name.y)

ntr <- nrow(means)

nk <- choose(ntr, 2)

if (p.adj != "none") {

a <- 1e-06

b <- 1

for (i in 1:100) {

x <- (b + a)/2

d <- p.adjust(x, n = nk, p.adj) - alpha

fa <- p.adjust(a, n = nk, p.adj) - alpha

if (d * fa < 0)

b <- x

if (d * fa > 0)

a <- x

}

Tprob <- qt(1 - x/2, DFerror)

}

nr <- unique(nn[, 2])

cat("\nStudy:", main)

cat("\n\nLSD t Test for", name.y, "\n")

if (p.adj != "none")

cat("P value adjustment method:", p.adj, "\n")

cat("\nMean Square Error: ", MSerror, "\n\n")

cat(paste(name.t, ",", sep = ""), " means and individual (",

(1 - alpha) * 100, "%) CI\n\n")

print(data.frame(row.names = means[, 1], means[, -1]))

cat("\nalpha:", alpha, "; Df Error:", DFerror)

cat("\nCritical Value of t:", Tprob, "\n")

if (group) {

if (length(nr) == 1) {

LSD <- Tprob * sqrt(2 * MSerror/nr)

cat("\nLeast Significant Difference", LSD)

}

else {

nr1 <- 1/mean(1/nn[, 2])

LSD <- Tprob * sqrt(2 * MSerror/nr1)

cat("\nLeast Significant Difference", LSD)

cat("\nHarmonic Mean of Cell Sizes ", nr1)

}

cat("\nMeans with the same letter are not significantly different.")

cat("\n\nGroups, Treatments and means\n")

output <- order.group(means[, 1], means[, 2], means[,

4], MSerror, Tprob, means[, 3])

w <- order(means[, 2], decreasing = TRUE)

output <- data.frame(output, LCI = means[w, 5], UCI = means[w,

6], LSD)

}

if (!group) {

comb <- combn(ntr, 2)

nn <- ncol(comb)

dif <- rep(0, nn)

LCL1 <- dif

UCL1 <- dif

sig <- NULL

pvalue <- rep(0, nn)

for (k in 1:nn) {

i <- comb[1, k]

j <- comb[2, k]

if (means[i, 2] < means[j, 2]) {

comb[1, k] <- j

comb[2, k] <- i

}

dif[k] <- abs(means[i, 2] - means[j, 2])

sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j,

4]))

pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror))

if (p.adj != "none")

pvalue[k] <- p.adjust(pvalue[k], n = nk, p.adj)

pvalue[k] <- round(pvalue[k], 6)

LCL1[k] <- dif[k] - Tprob * sdtdif

UCL1[k] <- dif[k] + Tprob * sdtdif

sig[k] <- " "

if (pvalue[k] <= 0.001)

sig[k] <- "***"

else if (pvalue[k] <= 0.01)

sig[k] <- "**"

else if (pvalue[k] <= 0.05)

sig[k] <- "*"

else if (pvalue[k] <= 0.1)

sig[k] <- "."

}

tr.i <- means[comb[1, ], 1]

tr.j <- means[comb[2, ], 1]

output <- data.frame(Difference = dif, pvalue = pvalue,

sig, LCL = LCL1, UCL = UCL1)

rownames(output) <- paste(tr.i, tr.j, sep = " - ")

cat("\nComparison between treatments means\n\n")

print(output)

output <- data.frame(trt = means[, 1], means = means[,

2], M = "", N = means[, 4], std.err, LCL, UCL)

}

invisible(output)

}

#Returns data frame with mean, SEM, grouping factor, UCI, LCI

lsd <- ldply(cukebyday.aov, function(x) LSD.test.test(x, "Treatment"))

p <- ggplot(lsd, aes(Day, means, group=trt))+

geom_line(aes(linetype=trt))+

geom_point(aes(shape = trt), size=4) +

geom_errorbar(aes(ymax=means+LSD, ymin=means-LSD), width=0.5)+

opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))+

geom_text(aes(x=Day, y=means, label=M, hjust=2, vjust=2))+

theme_bw()

p

Reply all

Reply to author

Forward