summarize(condsum = sum(v1)) %>%
melt(value.name='condsum', id.vars=c('group', 'c1')) %>% dcast(group ~ c1, value.var='condsum')
}
## let's do some benchmarking
library(microbenchmark)
set.seed( 1 )
N <- 30
df <- data.frame( group=sample( letters[1:4], N, replace=T)
, c1=sample( letters[20:22], N, replace=T)
, v1 = rbinom( N, 1, .5 )
, c2=sample( letters[20:22], N, replace=T)
, v2 = rbinom( N, 1, .5 )
)
## tidyr solution throws error so don't include :(
##> Hadley2(df)
##Error: index out of bounds
microbenchmark(
Robbie(df),
Hadley1(df),
Jaime(df), times=100)
## Unit: milliseconds
## expr min lq mean median uq max neval
## Robbie(df) 2.149660 2.405781 2.740472 2.571891 2.821945 11.86290 1000
## Hadley1(df) 1.851988 2.076746 2.410784 2.221399 2.452184 27.91905 1000
## Jaime(df) 6.018156 6.547313 7.356067 6.890498 7.415158 18.21118 1000
## now on a much larger (still in memory) example dataset
## note-- these take a bit of time to run!
set.seed( 1 )
N <- 3e6
df <- data.frame( group=sample( letters[1:4], N, replace=T)
, c1=sample( letters[20:22], N, replace=T)
, v1 = rbinom( N, 1, .5 )
, c2=sample( letters[20:22], N, replace=T)
, v2 = rbinom( N, 1, .5 )
)
microbenchmark(
Robbie(df),
Hadley1(df),
Jaime(df), times=50)
## Unit: milliseconds
## expr min lq mean median uq max neval
## Robbie(df) 5709.4797 5791.7142 5903.6788 5865.5936 5966.040 6319.9855 50
## Hadley1(df) 1762.8244 1828.1561 1875.0274 1863.7192 1906.411 2122.2618 50
## Jaime(df) 219.5084 225.2723 244.2125 229.4026 242.537 458.7869 50