Thanks Luis. I ended up doing a bit of a hack that seems to have worked.
First I made a data frame of the info from the VCF file I needed, including making a SNP column:
alleles_info <- data.frame(
CHROM = data.7@fix[, "CHROM"],
POS = data.7@fix[, "POS"],
REF = data.7@fix[, "REF"],
ALT = data.7@fix[, "ALT"],
row.names = NULL
)
#create the right column for adding back later
alleles_info$SNP <- paste0(alleles_info$POS, ":", alleles_info$REF, ">", alleles_info$ALT)
Then I converted to a genlight and added all the necessary info, including the SNP data
#Tell dartr that it is a binary SNP data
ploidy(gl.1) <- 2
# Add pop info and other individual metrics ### make sure individuals are in same order in both genlight and strata file before reading in. Get individual order from genlight using indNames(gl). Write to file and check against stratafile
# Export sample names so can check order of strata file
#names <- as.data.frame(indNames(gl))
#write.csv(names, paste0(filename, "_samplenames.csv"))
# Assign population
gl.1 <- gl.reassign.pop(gl.1, as.pop = "pop")
# check
table(pop(gl.1))
# Generate empty columns in loc.metrics
gl.1 <- gl.compliance.check(gl.1)
# Calculate loc.metrics
gl.1 <- gl.recalc.metrics(gl.1)
#Add back the allele info
#assign lat/long
###do this things so a fasta can be output
#add the alleles
alleles <- paste0(gl.5$other$loc.metrics$REF, "/", gl.5$other$loc.metrics$ALT)
#add the SNP postiion
position <- as.integer(gl.5$other$loc.metrics$POS)
gl.5@position <- position
gl.5$other$loc.metrics$position <- gl.5$other$loc.metrics$POS
###add fake trimmed sequence
gl.5@other$loc.metrics$TrimmedSequence <- "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
####trash version of gl2fasta code that works with VCF file origin data, with output suitable to IQtree.
x <- gl.5
outfile = "gl5.fasta"
#just run all of this
method = 3
allnames <- locNames(x)
snpmatrix <- as.matrix(x)
conversion <- matrix(c("A", "W", "R", "M", "W", "T",
"K", "Y", "R", "K", "G", "S", "M", "Y", "S", "C"),
nrow = 4, ncol = 4)
colnames(conversion) <- c("A", "T", "G", "C")
rownames(conversion) <- colnames(conversion)
allelepos <- x@position
allele1 <- gsub("(.)/(.)", "\\1", snp, perl = T)
allele2 <- gsub("(.)/(.)", "\\2", snp, perl = T)
# Open the sink to start writing to the file to your working directory
sink(outfile)
#run the for loop
for (i in 1:nInd(x)) {
seq <- NA
for (j in 1:nLoc(x)) {
if (
is.na(snpmatrix[i, j])) {
code <- "N"
}
else {
if (snpmatrix[i, j] == 0) {
a1 <- allele1[j]
a2 <- allele1[j]
}
if (snpmatrix[i, j] == 1) {
a1 <- allele1[j]
a2 <- allele2[j]
}
if (snpmatrix[i, j] == 2) {
a1 <- allele2[j]
a2 <- allele2[j]
}
code <- conversion[a1, a2]
}
snppos <- allelepos[j]
if (method == 1) {
if (code != "N") {
snppos + 2, 500))
}
else {
collapse = "")
}
}
else if (method == 3) {
seq[j] <- code
}
}
result <- paste(seq, sep = "", collapse = "")
cat(paste0(">", indNames(x)[i], "_", pop(x)[i], "\n"))
cat(result, " \n")
}
# Close the sink to stop writing to the file
sink()
There is a whole lot more to this if someone wants it they can email me.