> last_file_date <- max(input_files$as_at_date)
>
> dim(d)
[1] 170165 74
>
> system.time({
+ d %>%
+ ungroup() %>%
+ arrange_(~ id, ~ as_at_date) %>% # order rows by as-at date within entity
+ group_by_(~ id) -> d2 # check each entity
+ })
user system elapsed
0.817 0.000 0.818
>
> system.time({
+ d %>%
+ ungroup() %>%
+ arrange_(~ id, ~ as_at_date) %>% # order rows by as-at date within entity
+ group_by_(~ id) %>% # check each entity
+ mutate_(
+ first_date = ~ first(as_at_date), # first observed as-at date for this entity
+ last_date = ~ last(as_at_date), # last observed as-at date for this entity
+ last_date_ok = ~ last_date == last_file_date,
+ n_dates = ~ n()
+ ) -> d2
+ })
user system elapsed
1.798 0.000 1.799
>
> system.time({
+ d %>%
+ ungroup() %>%
+ arrange_(~ id, ~ as_at_date) %>% # order rows by as-at date within entity
+ group_by_(~ id) %>% # check each entity
+ mutate_(
+ first_date = ~ first(as_at_date), # first observed as-at date for this entity
+ last_date = ~ last(as_at_date), # last observed as-at date for this entity
+ last_date_ok = ~ last_date == last_file_date,
+ n_dates = ~ n(),
+ is_consecutive = ~ (as_at_date == first_date) |
+ (as_at_date == (lag(as_at_date) + months(6)))
+ ) -> d2
+ })
user system elapsed
81.546 0.191 81.806
>
> system.time({
+ months_6 <- months(6)
+ d %>%
+ ungroup() %>%
+ arrange_(~ id, ~ as_at_date) %>% # order rows by as-at date within entity
+ group_by_(~ id) %>% # check each entity
+ mutate_(
+ first_date = ~ first(as_at_date), # first observed as-at date for this entity
+ last_date = ~ last(as_at_date), # last observed as-at date for this entity
+ last_date_ok = ~ last_date == last_file_date,
+ n_dates = ~ n(),
+ is_consecutive = ~ (as_at_date == first_date) |
+ (as_at_date == (lag(as_at_date) + months_6))
+ ) -> d2
+ })
user system elapsed
19.483 0.187 19.691
>
> system.time({
+ months_6 <- months(6)
+ d %>%
+ ungroup() %>%
+ arrange_(~ id, ~ as_at_date) %>% # order rows by as-at date within entity
+ group_by_(~ id) %>% # check each entity
+ mutate_(
+ first_date = ~ first(as_at_date), # first observed as-at date for this entity
+ last_date = ~ last(as_at_date), # last observed as-at date for this entity
+ last_date_ok = ~ last_date == last_file_date,
+ n_dates = ~ n(),
+ is_consecutive = ~ (as_at_date == first_date) |
+ (as_at_date == (lag(as_at_date) + 180))
+ ) -> d2
+ })
user system elapsed
3.764 0.004 3.772
>