####### Setup #######
# Numpy is the numerical library dadi is built upon
import numpy as np
import dadi
import dadi.Godambe
####### Import data and generate fs #######
dd = dadi.Misc.make_data_dict("dadi_input_killarney_GEO_REC")
pop_ids, ns = ['GEO'], [400]
fs = dadi.Spectrum.from_data_dict(dd, pop_ids, ns, polarized = False) # fs folded because the ancestral state of SNPs is unknown
# We can save our extracted spectrum to disk
fs.mask[0:20] = True # mask the first 20 alleles because low confidence in frequency calling
fs.to_file('GEO.fs')
##### Likelihood ratio test #####
# Redefine the two custom demography models
def bottlegrowth_1d(params, ns, pts):
"""
Instantanous size change followed by exponential growth.
Tb is a fixed parameter and theta is inferred with the other parameters using multinom = False during optimization
params = (nuB,nuF,T)
ns = (n1,)
nuB: Ratio of population size after instantanous change to ancient
population size
nuF: Ratio of contemporary to ancient population size
Tb: Time in the past at which instantaneous change happened and growth began
(in units of 2*Na generations) = is a fixed parameter
n1: Number of samples in resulting Spectrum
pts: Number of grid points to use in integration.
"""
nuB,nuF,theta = params
xx = dadi.Numerics.default_grid(pts)
phi = dadi.PhiManip.phi_1D(xx, theta0=theta)
Tb = (48*2*2.64e-09*617993816)/theta
nu_func = lambda t: nuB*np.exp(np.log(nuF/nuB) * t/Tb)
phi = dadi.Integration.one_pop(phi, xx, T = Tb, nu = nu_func, theta0 = theta)
fs = dadi.Spectrum.from_phi(phi, ns, (xx,))
return fs
def two_epoch(params, ns, pts):
"""
Instantaneous size change some time ago.
Tb is a fixed parameter and theta is inferred with the other parameters using multinom = False during optimization
params = (nu,theta)
ns = (n1,)
nu: Ratio of contemporary to ancient population size
T: Time in the past at which size change happened (in units of 2*Na
generations)
n1: Number of samples in resulting Spectrum
pts: Number of grid points to use in integration.
"""
nu,theta = params
xx = dadi.Numerics.default_grid(pts)
phi = dadi.PhiManip.phi_1D(xx, theta0=theta)
Tb = (48*2.64e-09*617993816)/theta
phi = dadi.Integration.one_pop(phi, xx, T = Tb, nu = nu, theta0=theta)
fs = dadi.Spectrum.from_phi(phi, ns, (xx,))
return fs
# These are the grid point settings we will use for extrapolation.
pts_l = [410,420,430] # good results are obtained by setting the smallest grid size slightly larger than the largest population sample size
# Select the more complex demographic function
func = bottlegrowth_1d
# Make the extrapolating version of our demographic model function.
func_ex = dadi.Numerics.make_extrap_log_func(func)
# These are the best-fit parameters (nuB, nuF, Theta), which we found by multiple optimizations
popt = np.array([8.44E-06, 29.941, 1.31E+06])
# Calculate the best-fit model AFS.
model = func_ex(popt, ns, pts_l)
# Likelihood of the data given the model AFS.
ll_model = dadi.Inference.ll(model, fs)
# Load the data
data = fs
ns = data.sample_sizes
all_boot = [dadi.Spectrum.from_file('/home/msalomon/temporary/dadi_killarney_recovery/killarney_1D_GEO_fixedtime/bootstrapping/minGEO.subset.boot_{0}.fs'.format(ii))
for ii in range(100)]
### Let's do a likelihood-ratio test comparing models with and without recovery
# The no recovery model is implemented as two_epoch
func_two_epoch = two_epoch
func_ex_two_epoch = dadi.Numerics.make_extrap_log_func(func_two_epoch)
# These are the best-fit parameters (nu, Theta), which we found by multiple optimizations
popt_two_epoch = np.array([0.00013,1.32E+06])
model_two_epoch = func_ex_two_epoch(popt_two_epoch, ns, pts_l)
ll_two_epoch = dadi.Inference.ll(model_two_epoch, data)
# Since LRT evaluates the complex model using the best-fit parameters from the
# simple model, we need to create list of parameters for the complex model (nuB, nuF, Theta)
# using the simple (no recovery) best-fit params. Since evalution is done with more
# complex model, need to insert zero values at corresponding
# parameters index in complex model. And we need to tell the LRT adjust function
# that the 1st parameter (counting from 0) is the nested one.
p_lrt = [8.44E-06, 0, 1.31E+06]
adj = dadi.Godambe.LRT_adjust(func_ex, pts_l, all_boot, p_lrt, data,
nested_indices=[1], multinom=False)
D_adj = adj*2*(ll_model - ll_two_epoch)
print('Adjusted D statistic: {0:.4f}'.format(D_adj))
# Because this is test of a parameter on the boundary of parameter space
# (nuF cannot be less than zero), our null distribution is an even proportion
# of chi^2 distributions with 0 and 1 d.o.f. To evaluate the p-value, we use the
# point percent function for a weighted sum of chi^2 dists.
pval = dadi.Godambe.sum_chi2_ppf(D_adj, weights=(0.5,0.5))
print('p-value for rejecting no-migration model: {0:.4f}'.format(pval))