Hi Ben,
I study genetic and phenotypic diversity from population to population using ancestry informative markers (AIMs, i.e. SNPs with high allele frequency differentials among populations). Our collaborator developed a new method, myESL, that can identify these AIMs and I am benchmarking the new method against existing ones like FST, Delta and Informativeness.
I am doing this by simulating a simple population that mimics the Yuroba (YRI) population from the 1000 Genomes Project. For simplicity, I am focusing on chromosome 22.
Below is the SLiM script I am using to generate the population and it runs without any issues. Am I on the right track? Once this simple model is successful, I will incorpoate more parameters and make it complex.
Thank you!
initialize() {
defineConstant("N_YRI", 12500); // Effective population size for YRI
defineConstant("MU", 1.2e-8); // Human mutation rate per bp per generation
defineConstant("R", 1e-8); // Recombination rate (approximately 1 cM/Mb)
defineConstant("CHR_LENGTH", 50818468); // Length of chr22 in bp
defineConstant("BURN_IN", 10 * N_YRI); // Burn-in period
defineConstant("SAMPLE_TIME", BURN_IN + 1000); // When to take samples
// Initialize mutation rate
initializeMutationRate(MU);
// Define mutation types
initializeMutationType("m1", 0.5, "f", 0.0);
// m2: deleterious mutations (optional, can be added for realism)
// initializeMutationType("m2", 0.5, "g", -0.01, 0.2);
// Define genomic element type
initializeGenomicElementType("g1", m1, 1.0);
// Define the chromosome
initializeGenomicElement(g1, 0, CHR_LENGTH - 1);
// Set recombination rate
initializeRecombinationRate(R);
}
// Create the YRI population
1 early() {
sim.addSubpop("p1", N_YRI);
}
// Burn-in period: let population reach mutation-drift equilibrium
1:BURN_IN late() {
// Monitor progress every 1000 generations during burn-in
if (sim.cycle % 1000 == 0) {
cat("Burn-in generation " + sim.cycle + " / " + BURN_IN + "\n");
cat(" Segregating mutations: " + size(sim.mutations) + "\n");
cat(" Mean heterozygosity: " + calcHeterozygosity(p1.haplosomes) + "\n\n");
}
}
// After burn-in, run for additional generations
(BURN_IN+1):SAMPLE_TIME late() {
if (sim.cycle % 100 == 0) {
cat("Post-burn-in generation " + sim.cycle + "\n");
}
}
// Sample output at end
SAMPLE_TIME late() {
cat("\n=== SIMULATION COMPLETE ===\n");
cat("Final generation: " + sim.cycle + "\n");
cat("Population size: " + p1.individualCount + "\n");
cat("Segregating mutations: " + size(sim.mutations) + "\n");
cat("Fixed mutations: " + size(sim.substitutions) + "\n");
// Calculate summary statistics
cat("\nSummary Statistics:\n");
cat(" Mean heterozygosity: " + calcHeterozygosity(p1.haplosomes) + "\n");
// Output a sample of individuals (similar to 1000G sample size)
cat("\n=== OUTPUTTING SAMPLE ===\n");
cat("Sampling 108 individuals (216 haplotypes) to match 1000G YRI...\n");
// Sample 108 individuals randomly
sampledIndividuals = sample(p1.individuals, 108);
// Output in VCF format
sampledIndividuals.haplosomes.outputHaplosomesToVCF(filePath="yri_chr22_sample.vcf");
cat("Sample output complete. VCF written to: yri_chr22_sample.vcf\n");
sim.simulationFinished();
}