I think I did something similar at some point. Here you have some example code for recipe 7.3. It's not double-tested, but perhaps it's a useful starting point for you.
In the SLiM code, I would suggest keeping track of the mutation rates (as suggested by Ben) while defining the genomic regions and encoding them in the metadata of the tree sequence.
```slim
initialize() {
initializeTreeSeq();
defineConstant("MU", 1e-7);
initializeMutationType("m1", 0.5, "f", 0.0); // neutral
initializeMutationType("m2", 0.1, "g", -0.03, 0.2); // deleterious
initializeMutationType("m3", 0.8, "e", 0.1); // beneficial
//initializeGenomicElementType("g1", c(m1,m2,m3), c(2,8,0.1)); // exon
initializeGenomicElementType("g1", c(m2,m3), c(8,0.1)); // exon
defineConstant("MU_G1", MU * sum(c(8,0.1)) / sum(c(2,8,0.1)));
//initializeGenomicElementType("g2", c(m1,m2), c(9,1)); // intron
initializeGenomicElementType("g2", c(m2), 1); // intron
defineConstant("MU_G2", MU * 9 / sum(c(9, 1)));
initializeGenomicElementType("g3", c(m1), 1); // non-coding
defineConstant("MU_G3", 0.0);
// Generate random genes along an approximately 100000-base chromosome
base = 0;
ends = c();
mutRates = c();
while (base < 100000) {
// make a non-coding region
nc_length = rdunif(1, 100, 5000);
pos = base + nc_length - 1;
ends = c(ends, pos);
mutRates = c(mutRates, MU_G3);
initializeGenomicElement(g3, base, pos);
base = base + nc_length;
// make first exon
ex_length = asInteger(rlnorm(1, log(50), log(2))) + 1;
pos = base + ex_length - 1;
ends = c(ends, pos);
mutRates = c(mutRates, MU_G1);
initializeGenomicElement(g1, base, pos);
base = base + ex_length;
// make additional intron-exon pairs
do
{
in_length = asInteger(rlnorm(1, log(100), log(1.5))) + 10;
pos = base + in_length - 1;
ends = c(ends, pos);
mutRates = c(mutRates, MU_G2);
initializeGenomicElement(g2, base, pos);
base = base + in_length;
ex_length = asInteger(rlnorm(1, log(50), log(2))) + 1;
pos = base + ex_length - 1;
ends = c(ends, pos);
mutRates = c(mutRates, MU_G1);
initializeGenomicElement(g1, base, pos);
base = base + ex_length;
}
while (runif(1) < 0.8); // 20% probability of stopping
}
// final non-coding region
nc_length = rdunif(1, 100, 5000);
pos = base + nc_length - 1;
ends = c(ends, pos);
mutRates = c(mutRates, MU_G3);
initializeGenomicElement(g3, base, pos);
// single recombination rate
initializeRecombinationRate(1e-8);
// Landscape of mutation rate
defineConstant("POSITIONS", c(0, ends));
defineConstant("RATES", mutRates);
initializeMutationRate(mutRates, ends);
}
1 early() { sim.addSubpop("p1", 5000); }
10000 early() {
sim.treeSeqOutput(
'out.trees',
metadata=Dictionary(
"POSITIONS", POSITIONS,
"RATES", RATES
)
);
}
```
Then, in the python script, you do as usual but with a custom mutation rate map:
```python
import msprime, tskit, pyslim
import numpy as np
orig_ts = tskit.load("out.trees")
ts = pyslim.recapitate(orig_ts,
recombination_rate=1e-8,
ancestral_Ne=5000, random_seed=5
)
counts = [0, 0, 0]
for m in ts.mutations():
for md in m.metadata["mutation_list"]:
counts[md["mutation_type"]-1] += 1
print("Before overlaying mutations:")
print(
f"Mutations: neutral={counts[0]}, deleterious={counts[1]}, beneficial={counts[2]}"
)
metadata = ts.metadata["SLiM"]["user_metadata"]
positions = np.array(metadata["POSITIONS"])
positions[-1] += 1
# Here, we want the 'opposite', so we add mutations to regions
# where we did not add mutations before
rates_slim = np.array(metadata["RATES"])
baseline = 1e-7
rates_msprime = baseline - rates_slim
mut_map = msprime.RateMap(position=positions, rate=rates_msprime)
# Overlay neutral mutations
next_id = pyslim.next_slim_mutation_id(ts)
ts = msprime.sim_mutations(
ts,
rate=mut_map,
model=msprime.SLiMMutationModel(type=1, next_id=next_id),
keep=True,
)
# Print the number of mutations of each type
counts = [0, 0, 0]
for m in ts.mutations():
for md in m.metadata["mutation_list"]:
counts[md["mutation_type"]-1] += 1
print("After overlaying mutations:")
print(
f"Mutations: neutral={counts[0]}, deleterious={counts[1]}, beneficial={counts[2]}"
)
# Locate selected sites
sel_coeffs = np.array([
sum(md["selection_coeff"] for md in m.metadata["mutation_list"])
for m in ts.mutations()
])
which_max = np.argmax(sel_coeffs)
m = ts.mutation(which_max)
print(ts.site(m.site))
```
Best,