using DataFrames, GLM
N = 1000
data = DataFrame(X=randn(N),Y=randn(N))
beta_0 = 0
beta_1 = 1
index = (beta_0 + beta_1*data[:X])
probability = cdf(Normal(0,1),index)
D = zeros(N)
for i=1:N
D[i]=rand(Bernoulli(probability[i]))
end
data[:D]=D
data2 = data
data2[data2[:D] .== 0.0, :Y] = NA
data2
data3 = DataFrame(rand(MvNormal([0,0.],[1 .5;.5 1]),N)')
names!(data3,[:X,:Y])
mean(convert(Array,data3),1)
cov(convert(Array,data3))
index = (beta_0 + beta_1*data3[:X])
probability = cdf(Normal(0,1),index)
D = zeros(N)
for i=1:N
D[i]=rand(Bernoulli(probability[i]))
end
data3[:D]=D
data4 = data3
data4[data4[:D] .== 0.0, :Y] = NA