---
title: The Availability of Research Data Declines Rapidly with Article Age
bibliography: mybibfile.bib
preamble: >
\usepackage{graphix}
output: html_document
runtime: shiny
---
```{r, echo=FALSE}
library(shiny)
```
text1
```{r echo=FALSE}
fileInput('file1', 'Choose your own CSV File instead of provided data',accept=c('text/csv', 'text/comma-separated-values,text/plain', '.csv'))
go1 <- reactive({
dpath <- "CurrentBiologyData.txt"
if(!is.null(input$file1)){
dpath <- input$file1$datapath
}
read.table(dpath, header = TRUE) #choose 'CurrentBiologyData.txt'
})
renderPlot({
CB.dat<-go1()
dim(CB.dat)
names(CB.dat)
CB.dat$age <- 2013 - CB.dat$year
summary(factor(CB.dat$no_emails_worked))
CB.dat$email <- replace(CB.dat$no_emails_worked, which(CB.dat$no_emails_worked == 0), "yes")
CB.dat$email <- replace(CB.dat$email, which(CB.dat$no_emails_worked == 1), "no")
CB.dat$email <- factor(CB.dat$email)
summary(factor(CB.dat$email))
385/(131+385)
em1 <- glm(email ~ age, data = CB.dat, family = "binomial")
summary(em1)
exp(cbind(em1$coef, confint(em1)))
drop1(em1, test = "Chi")
newdata.em <- data.frame(age = seq(2, 22, 0.1))
newdata.em$phat <- predict(em1, newdata.em, type = "response")
newdata.em <- cbind(newdata.em, predict(em1, newdata.em, type = "response", se.fit = TRUE))
value<-1.96
newdata.em <- within(newdata.em, {
LL <- fit - value*se.fit
UL <- fit + value*se.fit
})
props.em <- tapply(CB.dat$no_emails_worked, factor(CB.dat$age), mean)
pred.props.em <- data.frame(ages = seq(2, 22, 2), props.em = props.em)
CB.through <- CB.dat[CB.dat$email == "yes", ]
CB.through$response <- CB.through$no_response
CB.through$response <- replace(CB.through$response, which(CB.through$response == 1), "no")
CB.through$response <- replace(CB.through$response, which(CB.through$response == 0), "yes")
CB.through$response <- factor(CB.through$response)
summary(CB.through$response)
re1 <- glm(response ~ age, data = CB.through, family = "binomial")
summary(re1)
exp(cbind(re1$coef, confint(re1)))
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props.re <- tapply(CB.through$no_response, factor(CB.through$age), mean) CB.use <- CB.through[which(CB.through$response == "yes"), ]
CB.use$response_no_help
CB.use$help <- CB.use$response_no_help
CB.use$help <- replace(CB.use$help, which(CB.use$help == 1), "no")
CB.use$help <- replace(CB.use$help, which(CB.use$help == 0), "yes")
CB.use$help <- factor(CB.use$help)
summary(CB.use$help)
h1 <- glm(help ~ age, data = CB.use, family = "binomial")
summary(h1)
exp(cbind(h1$coef, confint(h1)))
newdata.h <- data.frame(age = seq(2, 22, 0.1))
newdata.h$phat <- predict(h1, newdata.h, type = "response")
newdata.h <- cbind(newdata.h, predict(h1, newdata.h, type = "response", se.fit = TRUE))
newdata.h <- within(newdata.h, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props.h <- tapply(CB.use$response_no_help, factor(CB.use$age), mean)
pred.props.h <- data.frame(ages = seq(2, 22, 2), props.h = props.h)
CB.extant <- CB.use[which(CB.use$help == "yes"), ]
names(CB.extant)
summary(factor(CB.extant$data_received))
summary(factor(CB.extant$cant_share))
CB.extant$alive <- rowSums(cbind(CB.extant$data_received, CB.extant$cant_share))
summary(factor(CB.extant$alive))
al1 <- glm(alive ~ age, data = CB.extant, family = "binomial")
summary(al1)
exp(cbind(al1$coef, confint(al1)))
newdata.a <- data.frame(age = seq(2, 22, 0.1))
newdata.a$phat <- predict(al1, newdata.a, type = "response")
newdata.a <- cbind(newdata.a, predict(al1, newdata.a, type = "response", se.fit = TRUE))
newdata.a <- within(newdata.a, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props.a <- tapply(CB.extant$alive, factor(CB.extant$age), mean)
pred.props.a <- data.frame(ages = seq(2, 22, 2), props.a = props.a)
library(ggplot2)
p1 <<- ggplot(newdata.em, aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 1) +
labs(x = "age of paper (years)", y = "P(email got through)", title = "A") +
geom_point(aes(x = ages, y = 1-props.em), data = pred.props.em, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
p2 <<- ggplot(newdata.re, aes(x = age, y = fit)) + geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 1) +
labs(x = "age of paper (years)", y = "P(response|email got through)", title = "B") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
p3 <<- ggplot(newdata.h, aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 1) +
labs(x = "age of paper (years)", y = "P(useful response|response)", title = "C") +
geom_point(aes(x = ages, y = 1-props.h), data = pred.props.h, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
p4 <<- ggplot(newdata.a, aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 1) +
labs(x = "age of paper (years)", y = "P(data extant|useful response)", title = "D") +
geom_point(aes(x = ages, y = props.a), data = pred.props.a, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
require(grid)
plots <- c(list(...), plotlist)
numPlots = length(plots)
if (is.null(layout)) {
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
multiplot(p1, p3, p2, p4, cols = 2)
})
```
text2
```{r echo=FALSE}
renderPlot({
CB.dat$alive_all_papers <- rowSums(cbind(CB.dat$cant_share, CB.dat$data_received))
l1 <- glm(alive_all_papers ~ age, data = CB.dat, family = "binomial")
summary(l1)
newdata <- data.frame(age = seq(2, 22, 0.1))
newdata$phat <- predict(l1, newdata, type = "response")
newdata <- cbind(newdata, predict(l1, newdata, type = "response", se.fit = TRUE))
newdata <- within(newdata, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props <- tapply(CB.dat$alive_all_papers, factor(CB.dat$age), mean)
pred.props <- data.frame(ages = seq(2, 22, 2), props = props)
total_emails<-CB.dat$number_emails_paper+CB.dat$number_emails_web
ew1 <- glm(number_emails_paper ~ age, data = CB.dat, family = "poisson")
summary(ew1)
drop1(ew1, test = "Chi")
newdata.ew1 <- data.frame(age = seq(min(CB.dat$age), max(CB.dat$age), length.out=516))
newdata.ew1$phat <- predict(ew1, newdata.ew1, type = "response")
newdata.ew1 <- cbind(newdata.ew1, predict(ew1, newdata.ew1, type = "response", se.fit = TRUE))
newdata.ew1 <- within(newdata.ew1, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props.ew1 <- tapply(CB.dat$number_emails_paper, factor(CB.dat$age), mean, na.rm=TRUE)
pred.props.ew1 <- data.frame(ages = seq(2, 22, 2), props.ew1 = props.ew1)
paper_email_success<-cbind(CB.dat$number_emails_paper-CB.dat$number_bounced_paper_emails,CB.dat$number_bounced_paper_emails)
ew2 <- glm(paper_email_success ~ age, data = CB.dat, family = "binomial")
summary(ew2)
drop1(ew2, test = "Chi")
exp(cbind(ew2$coef, confint(ew2)))
newdata.ew2 <- data.frame(age = seq(min(CB.dat$age), max(CB.dat$age), length.out=516))
newdata.ew2$phat <- predict(ew2, newdata.ew2, type = "response")
newdata.ew2 <- cbind(newdata.ew2, predict(ew2, newdata.ew2, type = "response", se.fit = TRUE))
newdata.ew2 <- within(newdata.ew2, {
LL <- fit - 2.575*se.fit
UL <- fit + 2.575*se.fit
})
paper_email_year_total<-tapply(CB.dat$number_emails_paper, factor(CB.dat$age), sum,na.rm=T)
paper_email_success_year_total<-tapply(CB.dat$number_emails_paper-CB.dat$number_bounced_paper_emails, factor(CB.dat$age), sum,na.rm=T)
props.ew2 <- paper_email_success_year_total/paper_email_year_total
pred.props.ew2 <- data.frame(ages = seq(2, 22, 2), props.ew2 = props.ew2)
ew3 <- glm(number_emails_web ~ age, data = CB.dat, family = "poisson")
summary(ew3)
drop1(ew3, test = "Chi")
newdata.ew3 <- data.frame(age = seq(min(CB.dat$age), max(CB.dat$age), length.out=516))
newdata.ew3$phat <- predict(ew3, newdata.ew3, type = "response")
newdata.ew3 <- cbind(newdata.ew3, predict(ew3, newdata.ew3, type = "response", se.fit = TRUE))
newdata.ew3 <- within(newdata.ew3, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
props.ew3 <- tapply(CB.dat$number_emails_web, factor(CB.dat$age), mean, na.rm=TRUE)
pred.props.ew3 <- data.frame(ages = seq(2, 22, 2), props.ew3 = props.ew3)
web_email_success<-cbind(CB.dat$number_emails_web-CB.dat$number_bounced_web_emails, CB.dat$number_bounced_web_emails)
ew4 <- glm(web_email_success ~ age, data = CB.dat, family = "binomial")
summary(ew4)
drop1(ew4, test = "Chi")
exp(cbind(ew4$coef, confint(ew4)))
newdata.ew4 <- data.frame(age = seq(min(CB.dat$age), max(CB.dat$age), length.out=516))
newdata.ew4$phat <- predict(ew4, newdata.ew4, type = "response")
newdata.ew4 <- cbind(newdata.ew4, predict(ew4, newdata.ew4, type = "response", se.fit = TRUE))
newdata.ew4 <- within(newdata.ew4, {
LL <- fit - 1.96*se.fit
UL <- fit + 1.96*se.fit
})
web_email_year_total<-tapply(CB.dat$number_emails_web, factor(CB.dat$age), sum, na.rm=T)
web_email_success_year_total<-tapply(CB.dat$number_emails_web-CB.dat$number_bounced_web_emails, factor(CB.dat$age), sum)
props.ew4 <- web_email_success_year_total/web_email_year_total
pred.props.ew4 <- data.frame(ages = seq(2, 22, 2), props.ew4 = props.ew4)
em1 <- ggplot(newdata.ew1, aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) +
labs(x = "age of paper (years)", y = "number of emails in paper", title = "A") +
geom_point(aes(x = ages, y = props.ew1), data = pred.props.ew1, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
em2 <- ggplot(newdata.ew2, aes(x = age, y = fit*100)) +
geom_ribbon(aes(ymin = LL*100, ymax = UL*100), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 100) +
labs(x = "age of paper (years)", y = "% of paper emails that worked", title = "B") +
geom_point(aes(x = ages, y = props.ew2*100), data = pred.props.ew2, size = 2, col = "red") +
theme_bw() + theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
em3 <- ggplot(newdata.ew3, aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = LL, ymax = UL), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 1.25) +
labs(x = "age of paper (years)", y = "number of emails on web", title = "C") +
geom_point(aes(x = ages, y = props.ew3), data = pred.props.ew3, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
em4 <- ggplot(newdata.ew4, aes(x = age, y = fit*100)) +
geom_ribbon(aes(ymin = LL*100, ymax = UL*100), alpha = 0.25) +
geom_line(size = 1) + ylim(0, 100) +
labs(x = "age of paper (years)", y = "% of web emails that worked", title = "D") +
geom_point(aes(x = ages, y = props.ew4*100), data = pred.props.ew4, size = 2, col = "red") +
theme_bw()+ theme(axis.title=element_text(size=8), axis.text = element_text(size = 8))
multiplot(em1, em3, em2, em4, cols = 2)
})
```
text3