# set up the genotype data
samples <- paste("ind", 1:4, sep="")
samples
loci <- paste("loc", 1:3, sep="")
loci
testgen <- array(list(-9), dim = c(4,3), dimnames = list(samples,loci))
testgen[,"loc1"] <- list(c(-9), c(102,104), c(100,106,108,110,114),
c(102,104,106,110,112))
testgen[,"loc2"] <- list(c(77,79,83), c(79,85), c(-9), c(83,85,87,91))
testgen[,"loc3"] <- list(c(122,128), c(124,126,128,132), c(120,126),
c(124,128,130))
# look up which samples*loci have missing genotypes
find.missing.gen(testgen)
# get the three-dimensional distance array and the mean of the array
gendist <- meandistance.matrix(testgen, distmetric=Bruvo.distance,
maxl=4, all.distances=TRUE)
# look at the distances for loc1, where there is missing data and long genotypes
gendist[[1]]["loc1",,]
# look up all missing distances in the array
find.na.dist(gendist[[1]])
# look up just the missing distances that don't result from missing genotypes
find.na.dist.not.missing(testgen, gendist[[1]])
# Copy the array to edit the new copy
newDistArray <- gendist[[1]]
# calculate the distances that were NA from genotype lengths exceeding maxl
# (in reality, if this were too computationally intensive you might estimate
# it manually instead)
subDist <- Bruvo.distance(c(100,106,108,110,114), c(102,104,106,110,112))
subDist
# insert this distance into the correct positions
newDistArray["loc1","ind3","ind4"] <- subDist
newDistArray["loc1","ind4","ind3"] <- subDist
# calculate the new mean distance matrix
newMeanMatrix <- meandist.from.array(newDistArray)
# look at the difference between this matrix and the original.
newMeanMatrix
gendist[[2]]
Run the code above in your browser using DataLab