Re: forest plots in ggplot2: uneven scaling causing skewed/misplaced error bars

147 views
Skip to first unread message

catherin...@gmail.com

unread,
May 16, 2013, 7:32:41 PM5/16/13
to ggp...@googlegroups.com
apologies there appears to be some confusion about the dataset. The dataframe creates erroneous factors - as a matrix (yes, a character matrix but it does the job) the data is below.  I keep running into problems when I try to then put this data into a dataframe and running the forest plot code:

structure(c("GSE19615_GPL570_g", "GSE19697_GPL570_g", "GSE25055_GPL96_MDACC_M_g", 
"GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 
"GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g", "0.869972470611074", 
"0.326749295517069", "0.384344126803607", "0.843678380516048", 
"0.362583650763205", "1.1354277895057", "0.144218821146223", 
"0.464106969261813", "1.33093884313621", "0.662599776651844", 
"0.545133826525001", "1.38928393299835", "0.651274402039127", 
"1.86443935545432", "0.60375213550857", "0.761444478792686", 
"0.409006098085934", "-0.00910118561770601", "0.223554427082212", 
"0.298072828033745", "0.0738928994872826", "0.406416223557072", 
"-0.315314493216125", "0.16676945973094", "0.46096637252514", 
"0.335850481134775", "0.160789699721395", "0.545605552482303", 
"0.288690751275922", "0.729011565948626", "0.459533314362348", 
"0.297337509530873", "GSE19615_GPL570_g", "GSE19697_GPL570_g", 
"GSE25055_GPL96_MDACC_M_g", "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", 
"GSE25065_GPL96_MDACC_MDA_g", "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g", 
"20", "24", "42", "99", " 8", "28", " 6", "10"), .Dim = c(8L, 
7L), .Dimnames = list(c("GSE19615_GPL570_g", "GSE19697_GPL570_g", 
"GSE25055_GPL96_MDACC_M_g", "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", 
"GSE25065_GPL96_MDACC_MDA_g", "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"
), c("study_names", "g", "gMax", "gMin", "se.g", "study_names", 
"study_sizes")))



On Thursday, May 16, 2013 11:40:38 AM UTC-7, catherin...@gmail.com wrote:

Hi ggplot group,


I've been trying to fix a bug in my forest plots for a few weeks now (officially driving me crazy). 


The problem is that the axes do not pop out uniformly scaled. Seems like multiple things are going on here, even though the plot only needs 1 line of code from the R-bloggers tutorial: http://www.r-bloggers.com/forest-plots-using-r-and-ggplot2/  


The bars are oftentimes placed with flipped signs so that they don't fall on the mean value. Even the zero hline() isn't on the right scale.  I've tried using geom_errorbar() instead and scaling options, but I'm throwing in the towel. Does anyone have any suggestions? I think something's up with my data frame. Code is below.


Best,

Katie

___

Katie Planey

Stanford Biomedical Informatics

Butte Lab | http://buttelab.stanford.edu/




  p <-ggplot(data, aes(x=study_names,y=g,ymin=gMin,ymax=gMax)) +geom_pointrange() + geom_hline(aes(x=0), lty=2) + coord_flip()

 

plot(p)



#My dataset is below - it's effect sizes (g) and max/min (g+ standard deviation) for a meta-analysis of microarray data from different studies. 

#I just created a data frame with the relevant statistics:

data <- structure(list(study_names = structure(1:9, .Names = c("GSE19615_GPL570_g", 

"GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

"GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

"GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), .Label = c("GSE19615_GPL570_g", 

"GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

"GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

"GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), class = "factor"), 

    g = structure(c(1L, 8L, 6L, 3L, 2L, 4L, 9L, 7L, 5L), .Names = c("GSE19615_GPL570_g", 

    "GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

    "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

    "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), .Label = c("-0.0880224564200254", 

    "-0.293760093370228", "-0.297381677328331", "-0.303845191315", 

    "-0.307510645327556", "-0.426248689072173", "-0.590197752144345", 

    "-0.60072930481096", "-1.13419569672925"), class = "factor"), 

    gMax = structure(c(9L, 6L, 3L, 4L, 8L, 2L, 7L, 5L, 1L), .Names = c("GSE19615_GPL570_g", 

    "GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

    "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

    "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), .Label = c("-0.0644600967202706", 

    "-0.0646476363654359", "-0.10990976467328", "-0.166274833020161", 

    "-0.203680097076338", "-0.321207868946344", "-0.539150356441367", 

    "0.217241161839879", "0.289255627935715"), class = "factor"), 

    gMin = structure(c(2L, 7L, 5L, 1L, 6L, 3L, 9L, 8L, 4L), .Names = c("GSE19615_GPL570_g", 

    "GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

    "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

    "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), .Label = c("-0.428488521636501", 

    "-0.465300540775766", "-0.543042746264564", "-0.550561193934842", 

    "-0.742587613471065", "-0.804761348580336", "-0.880250740675576", 

    "-0.976715407212353", "-1.72924103701714"), class = "factor"), 

    se.g = structure(c(6L, 4L, 5L, 1L, 8L, 2L, 9L, 7L, 3L), .Names = c("GSE19615_GPL570_g", 

    "GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

    "GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

    "GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), .Label = c("0.13110684430817", 

    "0.239197554949564", "0.243050548607286", "0.279521435864616", 

    "0.316338924398893", "0.377278084355741", "0.386517655068008", 

    "0.511001255210108", "0.595045340287887"), class = "factor"), 

    study_names = c("GSE19615_GPL570_g", "GSE19697_GPL570_g", 

    "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", "GSE25065_GPL96_LBJ_g", 

    "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", "GSE25065_GPL96_PERU_g", 

    "GSE25065_GPL96_USO_g"), study_sizes = c(20, 24, 42, 99, 

    8, 28, 6, 10, 6)), .Names = c("study_names", "g", "gMax", 

"gMin", "se.g", "study_names", "study_sizes"), row.names = c("GSE19615_GPL570_g", 

"GSE19697_GPL570_g", "GSE22226_GPL1708_g", "GSE25055_GPL96_MDACC_M_g", 

"GSE25065_GPL96_LBJ_g", "GSE25065_GPL96_MDACC_g", "GSE25065_GPL96_MDACC_MDA_g", 

"GSE25065_GPL96_PERU_g", "GSE25065_GPL96_USO_g"), class = "data.frame")

Ito, Kaori (Groton)

unread,
May 16, 2013, 8:28:01 PM5/16/13
to catherin...@gmail.com, ggp...@googlegroups.com

Is this something you want?

(I just changed the data type as below, not the code)

 

Data <- data.frame(d) 

str(Data)

 

Data$g <- as.numeric(as.character(Data$g))

Data$gMin <- as.numeric(as.character(Data$gMin))

Data$gMax <- as.numeric(as.character(Data$gMax))

 

 

p <-ggplot(Data, aes(x=study_names,y=g,ymin=gMin,ymax=gMax)) +

geom_pointrange() +

geom_hline(aes(x=0), lty=2) +

coord_flip()

 

--
--
You received this message because you are subscribed to the ggplot2 mailing list.
Please provide a reproducible example: https://github.com/hadley/devtools/wiki/Reproducibility
 
To post: email ggp...@googlegroups.com
To unsubscribe: email ggplot2+u...@googlegroups.com
More options: http://groups.google.com/group/ggplot2
 
---
You received this message because you are subscribed to the Google Groups "ggplot2" group.
To unsubscribe from this group and stop receiving emails from it, send an email to ggplot2+u...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.
 
 

catherin...@gmail.com

unread,
May 16, 2013, 10:56:17 PM5/16/13
to ggp...@googlegroups.com

YES! Thank you - I had tried the as.numeric() alone and it gave unintelligible answers.  Slightly embarrassed, given that I've been working in R for awhile now...mysterious datatypes issues always get the best of me.

Reply all
Reply to author
Forward
0 new messages