My problem is that I would like to merge multiple files with a common
column but merge accepts only two
data.frames to merge. In the real situation, I have 26 different
data.frames with a common column. I can of course use merge many times
(see below) but what would be more sophisticated solution? For loop?
Any ideas?
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], b = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], c = rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], d = rnorm(5))
g <- merge(DF1, DF2, by.x="var1", by.y="var1", all=T)
g <- merge(g, DF3, by.x="var1", by.y="var1", all=T)
merge(g, DF4, by.x="var1", by.y="var1", all=T)
Thanks in advance.
-Lauri
______________________________________________
R-h...@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
DF <- DF1
for ( .df in list(DF2,DF3,DF4) ) {
DF<<-merge(DF,.df,by.x="var1", by.y="var1", all=T)
}
print(DF)
I think Reduce could help you.
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], b = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], c = rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], d = rnorm(5))
g <- merge(DF1, DF2, by.x="var1", by.y="var1", all=T)
g <- merge(g, DF3, by.x="var1", by.y="var1", all=T)
g <- merge(g, DF4, by.x="var1", by.y="var1", all=T)
test <- Reduce(function(x, y) merge(x, y, all=T,by.x="var1",
by.y="var1"),
list(DF1, DF2, DF3, DF4), accumulate=F)
all.equal(test, g) # TRUE
As a warning, it's the first time I've ever used it myself...
Hope this helps,
baptiste
_____________________________
Baptiste Auguié
School of Physics
University of Exeter
Stocker Road,
Exeter, Devon,
EX4 4QL, UK
Phone: +44 1392 264187
http://newton.ex.ac.uk/research/emag
Error in match.names(clabs, names(xi)) :
names do not match previous names
I refined this example data to look more like my real data, this also
produces the same error. Any ideas how to prevent this error?
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
> g <- merge(DF1, DF2, by.x="var1", by.y="var1", all=T)
> g <- merge(g, DF3, by.x="var1", by.y="var1", all=T)
> merge(g, DF4, by.x="var1", by.y="var1", all=T)
Error in match.names(clabs, names(xi)) :
names do not match previous names
> DF <- DF1
> for ( .df in list(DF2,DF3,DF4) ) {
+ DF <-merge(DF,.df,by.x="var1", by.y="var1", all=T)
+ }
Error in match.names(clabs, names(xi)) :
names do not match previous names
> Reduce(function(x, y) merge(x, y, all=T,by.x="var1", by.y="var1"), list(DF1, DF2, DF3, DF4), accumulate=F)
Error in match.names(clabs, names(xi)) :
names do not match previous names
- Lauri
2009/2/19 baptiste auguie <ba...@exeter.ac.uk>:
merge.rec <- function(.list, ...){
if(length(.list)==1) return(.list[[1]])
Recall(c(list(merge(.list[[1]], .list[[2]], ...)), .list[-(1:2)]), ...)
}
my.list <- list(DF1, DF2, DF3, DF4)
test2 <- merge.rec(my.list, by.x="var1", by.y="var1", all=T)
all.equal(test2, g)
Note that your second example does not work because in the last step
there are no common names between g and DF4 (I think). Using
suffixes=c("", "") seems to do the trick but I'm not sure it's giving
the result you want/expect.
Hope this helps,
baptiste
On 19 Feb 2009, at 10:21, Lauri Nikkinen wrote:
_____________________________
Baptiste Auguié
School of Physics
University of Exeter
Stocker Road,
Exeter, Devon,
EX4 4QL, UK
Phone: +44 1392 264187
http://newton.ex.ac.uk/research/emag
______________________________________________
http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:merge
It would be very nice if a R guru could check that the information I
put is not complete fantasy. Feel free to remove as appropriate.
Best wishes,
baptiste
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF <- DF1
for ( .df in list(DF2,DF3,DF4) ) {
DF <-merge(DF,.df,by.x="var1", by.y="var1", all=T)
names(DF)[-1] <- paste(names(DF)[-1], 2:length(names(DF)))
}
names(DF) <- sub("[[:space:]].+$", "", names(DF), perl=T)
DF
Thank you all!
-Lauri
2009/2/19 baptiste auguie <ba...@exeter.ac.uk>:
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF <- DF1
for ( .df in list(DF2,DF3,DF4) ) {
DF <-merge(DF,.df,by.x="var1", by.y="var1", all=T, suffixes=c("", ""))
}
DF
-Lauri
2009/2/19 Lauri Nikkinen <lauri.n...@iki.fi>:
set.seed(1)
DF1 <- data.frame(var1 = letters[1:5], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF2 <- data.frame(var1 = letters[3:7], a = rnorm(5), b = rnorm(5), c = rnorm(5))
DF3 <- data.frame(var1 = letters[6:10], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
DF4 <- data.frame(var1 = letters[8:12], a = rnorm(5), b = rnorm(5), c
= rnorm(5))
# create list of data frames
DFs <- list(A = DF1, B = DF2, C = DF3, D = DF4)
library(zoo)
# convert to list of zoo objects
z <- lapply(DFs, function(x) zoo(as.matrix(x[ ,-1, drop = FALSE]),
as.character(x[,1])))
# perform merge
zz <- do.call(merge, z)
# to convert back to data frame
DF <- as.data.frame(var1 = time(zz), coredata(zz))