LSD bars by time extraction from pylr then plotting

329 views
Skip to first unread message

Brandon Hurr

unread,
Jul 13, 2010, 2:13:01 PM7/13/10
to ggplot2
Hi all, 

I'm trying to create a series of graphs for a publication wherein we have measurements over time. I'm using the agricolae package to find the LSD value by day between treatments. I have no idea how to get the values out using l_ply() and then put those error bars onto the graph. 

Any ideas?

Thanks, 

Brandon

#Begin R code

#Load required packages
require(agricolae)
require(ggplot2)
require(plotrix)

#Data
cuke<-structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("Con", "ConM", "Eth", "EthM"), class = "factor"), 
Day = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 6L, 6L, 
6L, 6L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 
6L, 6L, 6L, 6L, 9L, 9L, 9L, 9L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L), Hue = c(123.79, 126.82, 123.24, 122.5, 
125.44, 124.11, 125.99, 122.7, 119.09, 124.96, 125.46, 124.27, 
125.92, 126.29, 125.57, 126.52, 125.38, 126.98, 125.22, 126.02, 
126.01, 124.73, 124.13, 127.15, 126.78, 126.92, 127.15, 126.44, 
126.59, 126.63, 123.87, 121.96, 123.2, 123.43, 122.98, 124.48, 
123.74, 123.96, 124.32, 124.32, 125.04, 126.46, 126.66, 125.51, 
121.44, 123.19, 124.46, 124.57, 122.62, 123.56, 122.81, 124.29, 
124.16, 122.25, 123.14, 124.1, 124.02, 123.64, 124.98, 124.8, 
123.19, 124.46, 124.57, 122.62, 123.56, 122.81, 124.29, 124.16, 
122.25, 123.14, 124.1, 124.02, 123.64, 124.98, 123.79, 126.82, 
122.5, 125.44, 125.99, 122.7, 119.09, 123.57, 122.82, 125.18, 
119.55, 123.26, 119.37, 118.79, 115.25, 115.67, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 123.99, 123.89, 124.76, 121.45, 123.06, 
125.55, 127, 123.92, 124.29, 120.91, 125.69, 124.25, 124.11, 
123.67, 123.21, 123.56, 126.69, 126.14, 125.37, 126.76, 127.41, 
122.39, 122.65, 122.24, 123.11, 125.63, 123.21, 123.39, 123.08, 
123.54, 124.69, 123.21, 121.3, 121.82, 123.9, 123.42, 123.92, 
119.89, 120.68, 119.42, 119.57, 119.85, 123.17, 125.96, 125.75, 
121.14, 125.01, 124.03, 124.56, 121.84, 120.25, 118.74, 121.94, 
117.09, 119.7, 111.07, 118.03, 119.32, 118.92, 121.07, 114.65, 
111.78, 113.44, 115.05, 112.74, 115.45, 121.8, 120.91, 122.49, 
123.99, 118.47, 123.31, 116.42, 114.15, 123.99, 123.89, 121.45, 
123.06, 127, 123.92, 124.29, 119.01, 116.52, 113.18, 113.56, 
107.57, 107.52, 112.53, 117.23, 117.32, 95.86572469, 101.1846093, 
101.8936238, 106.3913542, 105.6866304, 99.76811364, 107.0792098, 
108.0885293, 95.76880979)), .Names = c("Treatment", "Day", 
"Hue"), class = "data.frame", row.names = c(NA, -198L))


#returns a list with the aov by day in a list
cukebyday.aov <- dlply(cuke, .(Day), function(x) aov(Hue ~ Treatment, data=x))

#returns critical LSD Mean comparison 
l_ply(res.aov, function(x) LSD.test(x, "Treatment"))

#Need to get LSD values from l_ply or something

#computes mean and SEM for plotting
dataset.avg<-ddply(cuke, c("Treatment", "Day"), function(df) return(c(dataset.avg=mean(df[[3]],na.rm=TRUE), dataset.se=std.error(df[[3]]), na.rm=TRUE)))

#plots mean and SEM (want LSD)
avg.plot<-qplot(x=Day, dataset.avg, data=dataset.avg, geom=c("line","point"), ylab="Hue", main="Cuke Hue", color=Treatment)
avg.plot<- avg.plot+geom_errorbar(aes(ymax=dataset.avg+dataset.se, ymin=dataset.avg-dataset.se), width=0.5)+theme_bw()+opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))
avg.plot

Joe

unread,
Jul 14, 2010, 11:19:14 AM7/14/10
to ggplot2
I'm not sure exactly what this test is, but using ldply is probably
the first step you want.

lsd <- ldply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

and now, I'm guessing this is the plot you want, based on the LSD.test
man page.

p <- ggplot(lsd, aes(trt, means))+
geom_bar(aes(fill = M))+
facet_wrap(~Day)

There's some funkyness with the levels of M, so you might want to do
something like

levels(lsd$M) <- sub(" *", "", levels(lsd$M))

to clean that up.

Brandon Hurr

unread,
Jul 14, 2010, 1:18:44 PM7/14/10
to Joe, ggplot2
Joe, 

Thanks for the tips. What you have done is really interesting. Coloring by grouping isn't something I was considering since it's likely the publication will be in black and white, but I might throw it at the old advisor anyway for S's & giggles. 

What I'm actually trying to do is to graph the trend over time on a single graph with the four treatments, which is why I used geom_line and geom_point. (I really need to get away from using qplot.) Instead of SEM error bars, I'd like to use LSD or some other means separation test to make error bars for each day so that you can see if there is significance or not between treatments on a given day. Does that make sense? 

It looks like your "lsd" output has most all of the information I need to do that. However, if you read the l_ply() output the Least significant difference line is the number I'm interested in. 
Study:

LSD t Test for Hue 

Mean Square Error:  12.32855 

Treatment,  means and individual ( 95 %) CI

          Hue   std.err replication       LCL      UCL
Con  123.6993 0.2155098          14 123.26132 124.1373
ConM 117.4750 1.1348683          14 115.16867 119.7813
EthM 102.4141 1.5648717           9  99.23387 105.5943

alpha: 0.05 ; Df Error: 34
Critical Value of t: 2.032245 

Least Significant Difference 2.936135
Harmonic Mean of Cell Sizes  11.8125
Means with the same letter are not significantly different.

Groups, Treatments and means
a Con   123.6993 
 b ConM 117.475 
  c EthM 102.4141 

It's my first time trying to use the plyr functions, so I'm not sure I quite understand why the l_ply function and ldply function had such different outputs, and I'm still not quite sure how to get the LSD value out. 

I don't suppose there is a section in the ggplot2 book about the plyr functions? I just bought it and I'm looking forward to understanding more.

Thanks, 

Brandon 

###New code

lsd.l_ply<-l_ply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

#Returns data frame with mean, SEM, grouping factor, UCI, LCI
lsd <- ldply(cukebyday.aov, function(x) LSD.test(x, "Treatment"))

p <- ggplot(lsd, aes(Day, means))+
 geom_line(aes(color= trt))+
 geom_point(aes(shape = trt)) +
 geom_errorbar(aes(ymax=UCI, ymin=LCI, color=M), width=0.5)+
 opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))
 
p

##END NEW CODE

--
You received this message because you are subscribed to the ggplot2 mailing list.
Please provide a reproducible example: http://gist.github.com/270442

To post: email ggp...@googlegroups.com
To unsubscribe: email ggplot2+u...@googlegroups.com
More options: http://groups.google.com/group/ggplot2

Josef Fruehwald

unread,
Jul 14, 2010, 1:37:39 PM7/14/10
to Brandon Hurr, ggplot2
Brandon,

With the plyr functions, the first letter indicates the input data format, and the second letter indicates the output data format. l_ply() produces no output. The stuff it prints is just what LSD.test() prints when it's run, but apparently the object it produces isn't that.

With ldply(), you're saying, "Take every element of the input list, apply a function, coerce the output of that function into a data frame, then paste it all together." I just hoped that the output of LSD.test() would be nicely coercible into a dataframe, and it was.

As for your plot, I would not mix what the colors mean. In your plot, you use them to indicate both treatment and lsd group. I'd indicate lsd group with the point shape, then connect treatments with the same colored line, then add error bars in the same color as the treatment.

p <- ggplot(lsd, aes(Day, means))+
  geom_line(aes(colour = trt))+
  geom_errorbar(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5)+
  geom_point(aes(shape = M),size = 3)

Josef Fruehwald

unread,
Jul 14, 2010, 1:40:23 PM7/14/10
to Brandon Hurr, ggplot2
Also, geom_ribbon() creates more visual noise, but I sometimes prefer its appearance to errorbars.

ggplot(lsd, aes(Day, means))+
  geom_line(aes(colour = trt))+
  geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+

  geom_point(aes(shape = M),size = 3)

Brandon Hurr

unread,
Jul 14, 2010, 1:59:31 PM7/14/10
to Josef Fruehwald, ggplot2
I share your appreciation of geom_ribbon() as it's certainly easier to make out which is significantly different from what as long as you don't have too many treatments.  

The reason I did the colors for both was that if you had only: 

p <- ggplot(lsd, aes(Day, means))+
 geom_line()+
 geom_point(aes(shape = trt)) +
 geom_errorbar(aes(ymax=UCI, ymin=LCI, color=M), width=0.5)+
 opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))
 
p

It doesn't connect the lines by treatment. I couldn't figure out how to do it quickly so I just left it with extra colors to show what I was going for. 

Any idea how to pull out the LSD number I highlighted? It seems like it doesn't make it into the ldply() dataframe. I suppose an alternative would be to use geom_text() to put the grouping next to each point. 

> lsd
   Day  trt    means M         N   std.err       LCI      UCI
1    0 Con  124.4100 a  9.545455 0.5102194 123.37881 125.4412
2    0 ConM 123.9833 a  9.545455 0.3944649 123.18609 124.7806
3    0 EthM 123.9429 a  9.545455 0.6261360 122.67739 125.2083
4    0 Eth  123.7614 a  9.545455 0.9961818 121.74807 125.7748
5    3 Con  126.1767 a 15.000000 0.2380890 125.68896 126.6644
6    3 ConM 124.3447 b 15.000000 0.4596343 123.40315 125.2862
7    6 Con  124.0913 a  7.500000 0.3754716 123.32984 124.8528
8    6 Eth  122.8760 a  7.500000 0.9220336 121.00603 124.7460
9    6 ConM 122.4367 a  7.500000 0.5780366 121.26435 123.6090
10   6 EthM 113.9680 b  7.500000 1.9190659 110.07595 117.8600
11   9 Con  123.7727 a  6.315789 0.2136273 123.33852 124.2068
12   9 ConM 120.1807 b  6.315789 0.8909639 118.37001 121.9913
13   9 Eth  117.2700 b  6.315789 1.0551777 115.12562 119.4144
14   9 EthM 113.6500 c  6.315789 2.3294599 108.91597 118.3840
15  12 Con  123.6993 a 11.812500 0.2155098 123.26132 124.1373
16  12 ConM 117.4750 b 11.812500 1.1348683 115.16867 119.7813
17  12 EthM 102.4141 c 11.812500 1.5648717  99.23387 105.5943

Brandon Hurr

unread,
Jul 14, 2010, 2:11:46 PM7/14/10
to Josef Fruehwald, ggplot2
Joe, 

Thanks a ton. I'll keep plugging away and if I figure it out, post it here for posterity. 

Brandon

On Wed, Jul 14, 2010 at 19:09, Josef Fruehwald <jofr...@gmail.com> wrote:
I would avoid using geom_text() to annotate points. In the code I had, the grouping is indicated by the point shape. If you don't like that so much, you might try:

ggplot(lsd, aes(Day, means))+
  geom_line(aes(colour = trt))+
  geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+
  geom_text(aes(label = M))

But definitely don't use both point shape and group label.

As for getting the LSD number, I really have no idea. LSD.test() doesn't return an object which has that statistic in it, or extractable. Maybe you can calculate it from the output it does produce, but other than that I'm clueless.

-Joe

Josef Fruehwald

unread,
Jul 14, 2010, 2:09:59 PM7/14/10
to Brandon Hurr, ggplot2
I would avoid using geom_text() to annotate points. In the code I had, the grouping is indicated by the point shape. If you don't like that so much, you might try:

ggplot(lsd, aes(Day, means))+
  geom_line(aes(colour = trt))+
  geom_ribbon(aes(ymax=UCI, ymin=LCI, color=trt), width=0.5,alpha = 0.3)+
  geom_text(aes(label = M))

But definitely don't use both point shape and group label.

As for getting the LSD number, I really have no idea. LSD.test() doesn't return an object which has that statistic in it, or extractable. Maybe you can calculate it from the output it does produce, but other than that I'm clueless.

-Joe

Brandon Hurr

unread,
Jul 22, 2010, 12:09:48 PM7/22/10
to ggplot2
Dear all, 

For posterity's sake I thought I'd put this up here. I've got a graph pretty close to what I want, just a bit more tweaking. The important part was that I figured out how to get the LSD values out of LSD.test() from the Agricolae package. I hacked it to force the output of the LSD value and created a new function called LSD.test.test().  With clear separation annotating the grouping with geom-text works pretty well too, although with much more complex data the groupings become a bit too long (cdefg, cdefgh, etc...)

Many thanks to Joe Fruehwald and Mike Lawrence for all their help.   

Brandon

P.S. I think I've learned enough to be dangerous.  :O

###Begin R code

LSD.test.test<-function (y, trt, DFerror, MSerror, alpha = 0.05, p.adj = c("none", 
    "holm", "hochberg", "bonferroni", "BH", "BY", "fdr"), group = TRUE, 
    main = NULL) 
{
    p.adj <- match.arg(p.adj)
    clase <- c("aov", "lm")
    name.y <- paste(deparse(substitute(y)))
    name.t <- paste(deparse(substitute(trt)))
    if ("aov" %in% class(y) | "lm" %in% class(y)) {
        A <- y$model
        DFerror <- df.residual(y)
        MSerror <- deviance(y)/DFerror
        y <- A[, 1]
        ipch <- pmatch(trt, names(A))
        if (is.na(ipch)) 
            return(cat("Name: ", trt, "\n", names(A)[-1], "\n"))
        name.t <- names(A)[ipch]
        trt <- A[, ipch]
        name.y <- names(A)[1]
    }
    junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
    means <- tapply.stat(junto[, 1], junto[, 2], stat = "mean")
    sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")
    nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")
    std.err <- sds[, 2]/sqrt(nn[, 2])
    Tprob <- qt(1 - alpha/2, DFerror)
    LCL <- means[, 2] - Tprob * std.err
    UCL <- means[, 2] + Tprob * std.err
    means <- data.frame(means, std.err, replication = nn[, 2], 
        LCL, UCL)
    names(means)[1:2] <- c(name.t, name.y)
    ntr <- nrow(means)
    nk <- choose(ntr, 2)
    if (p.adj != "none") {
        a <- 1e-06
        b <- 1
        for (i in 1:100) {
            x <- (b + a)/2
            d <- p.adjust(x, n = nk, p.adj) - alpha
            fa <- p.adjust(a, n = nk, p.adj) - alpha
            if (d * fa < 0) 
                b <- x
            if (d * fa > 0) 
                a <- x
        }
        Tprob <- qt(1 - x/2, DFerror)
    }
    nr <- unique(nn[, 2])
    cat("\nStudy:", main)
    cat("\n\nLSD t Test for", name.y, "\n")
    if (p.adj != "none") 
        cat("P value adjustment method:", p.adj, "\n")
    cat("\nMean Square Error: ", MSerror, "\n\n")
    cat(paste(name.t, ",", sep = ""), " means and individual (", 
        (1 - alpha) * 100, "%) CI\n\n")
    print(data.frame(row.names = means[, 1], means[, -1]))
    cat("\nalpha:", alpha, "; Df Error:", DFerror)
    cat("\nCritical Value of t:", Tprob, "\n")
    if (group) {
        if (length(nr) == 1) {
            LSD <- Tprob * sqrt(2 * MSerror/nr)
            cat("\nLeast Significant Difference", LSD)
        }
        else {
            nr1 <- 1/mean(1/nn[, 2])
            LSD <- Tprob * sqrt(2 * MSerror/nr1)
            cat("\nLeast Significant Difference", LSD)
            cat("\nHarmonic Mean of Cell Sizes ", nr1)
        }
        cat("\nMeans with the same letter are not significantly different.")
        cat("\n\nGroups, Treatments and means\n")
        output <- order.group(means[, 1], means[, 2], means[, 
            4], MSerror, Tprob, means[, 3])
        w <- order(means[, 2], decreasing = TRUE)
        output <- data.frame(output, LCI = means[w, 5], UCI = means[w, 
            6], LSD)
    }
    if (!group) {
        comb <- combn(ntr, 2)
        nn <- ncol(comb)
        dif <- rep(0, nn)
        LCL1 <- dif
        UCL1 <- dif
        sig <- NULL
        pvalue <- rep(0, nn)
        for (k in 1:nn) {
            i <- comb[1, k]
            j <- comb[2, k]
            if (means[i, 2] < means[j, 2]) {
                comb[1, k] <- j
                comb[2, k] <- i
            }
            dif[k] <- abs(means[i, 2] - means[j, 2])
            sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j, 
                4]))
            pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror))
            if (p.adj != "none") 
                pvalue[k] <- p.adjust(pvalue[k], n = nk, p.adj)
            pvalue[k] <- round(pvalue[k], 6)
            LCL1[k] <- dif[k] - Tprob * sdtdif
            UCL1[k] <- dif[k] + Tprob * sdtdif
            sig[k] <- " "
            if (pvalue[k] <= 0.001) 
                sig[k] <- "***"
            else if (pvalue[k] <= 0.01) 
                sig[k] <- "**"
            else if (pvalue[k] <= 0.05) 
                sig[k] <- "*"
            else if (pvalue[k] <= 0.1) 
                sig[k] <- "."
        }
        tr.i <- means[comb[1, ], 1]
        tr.j <- means[comb[2, ], 1]
        output <- data.frame(Difference = dif, pvalue = pvalue, 
            sig, LCL = LCL1, UCL = UCL1)
        rownames(output) <- paste(tr.i, tr.j, sep = " - ")
        cat("\nComparison between treatments means\n\n")
        print(output)
        output <- data.frame(trt = means[, 1], means = means[, 
            2], M = "", N = means[, 4], std.err, LCL, UCL)
    }
    invisible(output)
}
#Returns data frame with mean, SEM, grouping factor, UCI, LCI
lsd <- ldply(cukebyday.aov, function(x) LSD.test.test(x, "Treatment"))

p <- ggplot(lsd, aes(Day, means, group=trt))+
geom_line(aes(linetype=trt))+
geom_point(aes(shape = trt), size=4) +
geom_errorbar(aes(ymax=means+LSD, ymin=means-LSD), width=0.5)+
opts(axis.text.x=theme_text(angle=-45, hjust=0, size=9))+
geom_text(aes(x=Day, y=means, label=M, hjust=2, vjust=2))+
theme_bw()

p
Reply all
Reply to author
Forward
0 new messages