#load tidyverse for all things good
library(tidyverse)
#load viridis for nice color scales
library(viridis)
# generate data
set.seed(1) #for reproducibility
nX = 100 #num obs on x
nY = 1e3 #num samples at each x
dat = tibble(
x = rep(1:nX,each=nY)
, sample = rep(1:nY,times=nX)
, y = rnorm(nX*nY) # random for example
)
#chain to add rank & dist data to each y at each x
dat %>%
dplyr::group_by(
x
) %>%
dplyr::arrange(
x
, y
) %>%
dplyr::mutate(
# rank: rank of y observation
rank = 1:n()
# dist: rank's distance from median rank
, dist = (abs(rank-median(rank))+.5)*2
# dist01: dist as % of max distance
, dist01 = (dist-1)/(n()-1)*100
) -> dat #just overwrite dat since we only added columns
#get unique distances
dist_list = rev(sort(unique(dat$dist)))
#toss the distance = 0 (only present if nY is odd)
dist_list = dist_list[dist_list>0]
# if nY is really big, the following for loop can hit stack limits, so use
# num_dists_to_vis to visualize a subset of the distances
num_dists_to_vis = 100
num_dists_to_vis = min(length(dist_list),num_dists_to_vis)
dist_list = dist_list[
round(
seq.int(1,length(dist_list),length.out=num_dists_to_vis))
]
#start an empty plot that we'll add to in a loop
p = ggplot()
#loop over distances in dist_list
for(i in dist_list){
dat %>%
#filter to the distance for this loop
dplyr::filter(
dist==i
) %>%
#grab the upper and lower bounds, & distances
# note: dat is still grouped by x from above
dplyr::summarise(
lo = y[1]
, hi = y[2]
, dist = dist[1]
, dist01 = dist01[1]
) -> temp #assign to temporary obj
#add a ribbon for this distance
# note: using the dist01 for fill & colour
p = p + geom_ribbon(
data = temp
, mapping = aes(
x = x
, ymin = lo
, ymax = hi
, fill = dist01
, colour = dist01
)
)
}
#add a line for the upper & lower bound of the 100%ile interval
p = p + geom_line(
data = dat[dat$dist==max(dat$dist),]
, mapping = aes(
x = x
, y = y
, group = rank
)
)
#add color & fill scales
p = p +
scale_fill_viridis(
name = '%ile'
, direction = -1
, option = 'plasma'
)+
scale_color_viridis(
name = '%ile'
, direction = -1
, option = 'plasma'
)
# since the ribbons overlap on eachother, we can't use alpha to show the
# panel grid lines, so we need to grab them and use geom_vline & geom_hline
b = ggplot_build(p)
x_major = b$layout$panel_ranges[[1]]$x.major_source
x_minor = b$layout$panel_ranges[[1]]$x.minor_source
y_major = b$layout$panel_ranges[[1]]$y.major_source
y_minor = b$layout$panel_ranges[[1]]$y.minor_source
#add the grid lines
p = p +
geom_vline(
xintercept = x_major
, size = 1
, colour = 'white'
, alpha = .1
) +
geom_vline(
xintercept = x_minor
, size = .5
, colour = 'white'
, alpha = .1
) +
geom_hline(
yintercept = y_major
, size = 1
, colour = 'white'
, alpha = .1
) +
geom_hline(
yintercept = y_minor
, size = .5
, colour = 'white'
, alpha = .1
)
#remove the original grid lines and fill the background with
# the 100% color for aesthetics
p = p + theme(
panel.background = element_rect(
fill = viridis(1,option='plasma')
)
, panel.grid = element_blank()
)
#constrain the plot to only show the domain of the data
p = p +
scale_y_continuous(
expand = c(0,0)
) +
scale_x_continuous(
expand = c(0,0)
)
#done!
print(p)