Hi Manuela,
1. The reason of why MAC are not integers using the code I shared is because MAF has only 4 decimals, we would need to use an infinite number of decimals to get integers... Alternatively, we could just count the number of alleles....
2. Yes!
3. Because heterozygosity/homozygosity is calculated only taking in account loci with rare alleles and it is calculated by population, it is reasonable that homozygosity is 1 in those populations in which the rare allele is not present. Heterozygosity/homozygosity is the probability of two alleles drawn at random from the gene pool are heterozygous/homozygous, it does not provide information about which of the two alleles is heterozygous/homozygous.
I think what you want to know is the percentage of individuals that are homozygous for the rare allele by population, for that see code below.
Cheers,
Luis
library(dartRverse)
library(reshape)
t1 <-
platypus.gl# filtering loci with all missing data
t1 <- gl.filter.allna(t1)
# filtering monomorphs
t1 <- gl.filter.monomorphs(t1)
# recalculating metrics
t1 <- gl.recalc.metrics(t1)
# converting MAF to minor allele count (MAC)
MAC <- t1$other$loc.metrics$maf * (nInd(t1) * 2)
t1$other$loc.metrics$MAC <- MAC
# getting loci with less than 5 alleles
t1_rare <- gl.filter.locmetric(t1,
metric = "MAC",
upper = 5,
lower = 0,
keep = "within")
# calculate frequencies of the reference and alternative alelles
allele_freq <- gl.allele.freq(t1_rare,by="loc")
# Get the loci where the rare allele is the reference allele
ref_rare <- locNames(t1_rare)[which(allele_freq$frequency > 0.5 )]
# separating datasets based on whether the rare allele is the reference or the alternative
t1_rare_ref <- gl.keep.loc(t1_rare,loc.list = ref_rare)
t1_rare_alt <- gl.drop.loc(t1_rare,loc.list = ref_rare)
#separating populations
t1_rare_ref_bypop <- seppop(t1_rare_ref)
t1_rare_alt_bypop <- seppop(t1_rare_alt)
#converting to matrix
t1_rare_ref_bypop_mat <- lapply(t1_rare_ref_bypop, as.matrix)
t1_rare_alt_bypop_mat <- lapply(t1_rare_alt_bypop, as.matrix)
#counting the number of individuals that are homozygous for the reference allele
ind_ref <- lapply(t1_rare_ref_bypop_mat, function(y){
unlist(unname(apply(y,2,function(x){
length(which(x==0))
})))
})
#counting the number of individuals that are homozygous for the alternative allele
ind_alt <- lapply(t1_rare_alt_bypop_mat, function(y){
unlist(unname(apply(y,2,function(x){
length(which(x==2))
})))
})
# merging number of individuals that are hmozygous for the alternative and
# the reference allele
ind_all <- mapply("c",ind_ref,ind_alt,SIMPLIFY = F)
# number of individuals per population
nInd_bypop <- as.numeric(table(pop(t1_rare)))
# getting the percentage of individuals that are homozygous
perc_ind <- as.data.frame(mapply("/", ind_all, nInd_bypop))
#mean by pop
perc_ind_mean <- data.frame(hom=colMeans(perc_ind),
pop= colnames(perc_ind))
# plot
ggplot(perc_ind_mean,aes(x = pop,y= hom,fill=pop))+
geom_col() +
labs(y= "Percentage of individuals that \n are homozygous for the rare allele (MAC < 5)")