Last chance! 50% off unlimited learning
Sale ends in
This approach proved more efficient than phylogenetic approaches for
reconstructing transmission trees in densely sampled disease outbreaks
[1]. This implementation defines a generic function seqTrack
with methods for specific object classes.
seqTrack(...)## S3 method for class 'matrix':
seqTrack(x, x.names, x.dates, best = c("min", "max"),
prox.mat = NULL, mu = NULL, haplo.length = NULL, ...)
plotSeqTrack(x, xy, use.arrows=TRUE, annot=TRUE, labels=NULL, col=NULL,
bg="grey", add=FALSE, quiet=FALSE,
date.range=NULL, jitter.arrows=0, plot=TRUE, ...)
get.likelihood(...)
## S3 method for class 'seqTrack':
get.likelihood(x, mu, haplo.length, \ldots)
seqTrack
object.details
or ?as.POSIXct
for more information.prox.mat
. prox.mat[i,j]
haplo.length
to be provided).mu
to be provided).jitter
.seqTrack
,
in which each row is an inferred ancestry described by the following columns:
- id: indices identifying haplotypes/genotypes
- ances: index of the inferred ancestor
- weight: weight of the inferred ancestries
- date: date of the haplotype/genotype
- ances.date: date of the ancestor=== output of plotSeqTrack === This graphical function invisibly returns the coordinates of the arrows/segments drawn and their colors, as a data.frame.
x
a matrix of
genetic distances. The most straightforward distance is the number of
differing nucleotides. See dist.dna
in the ape
package for a wide range of genetic distances between aligned
sequences. The argument best
should be set to "min" (its
default value), so that the identified genealogy minimizes the total
number of mutations. If x
contains number of mutations, then
mu
and haplo.length
should also be provided for
resolving ties in equally parsimonious ancestors using maximum
likelihood. === Likelihood of observed genetic differentiation ===
The probability of oberving a given number of mutations between a
sequence and its ancestor can be computed using
get.likelihood.seqTrack
. Note that this is only possible
if x
contained number of mutations.
=== Converting seqTrack objects to graphs ===
seqTrack objects can be converted to graphNEL-class
objects,
which can in turn be plotted and manipulated using classical graph
tools. Simply use 'as(x, "graphNEL")' where 'x' is a seqTrack
object. This functionality requires the graph
package. Note
that this is to be installed from Bioconductor, likely using the following
command lines:
source("http://bioconductor.org/biocLite.R")
biocLite("graph")
Also note that the R package Rgraphviz (also on Bioconductor) provides nice ways of plotting graphs (replace 'graph' with 'Rgraphviz' in the previous command lines to install this package).
dist.dna
in the ape package to compute pairwise genetic distances in aligned sequences.if(require(ape)){
## ANALYSIS OF SIMULATED DATA ##
## SIMULATE A GENEALOGY
dat <- haploGen(seq.l=1e4, repro=function(){sample(1:4,1)}, gen.time=1, t.max=3)
## SEQTRACK ANALYSIS
res <- seqTrack(dat, mu=0.0001, haplo.length=1e4)
## PROPORTION OF CORRECT RECONSTRUCTION
mean(dat$ances==res$ances,na.rm=TRUE)
## PLOT RESULTS
if(require(graph) && require(Rgraphviz)){
dat.g <- as(dat, "graphNEL")
res.g <- as(res, "graphNEL")
## ORIGINAL DATA
dat.annot <- as.character(unlist(edgeWeights(dat.g)))
names(dat.annot) <- edgeNames(dat.g)
renderGraph(layoutGraph(dat.g, edgeAttrs = list(label = dat.annot)))
## SEQTRACK RESULTS
res.annot <- as.character(unlist(edgeWeights(res.g)))
names(res.annot) <- edgeNames(res.g)
renderGraph(layoutGraph(res.g, edgeAttrs = list(label = res.annot)))
}
## ANALYSIS OF PANDEMIC A/H1N1 INFLUENZA DATA ##
dat <- read.csv(system.file("files/pdH1N1-data.csv",package="adegenet"))
ha <- read.dna(system.file("files/pdH1N1-HA.fasta",package="adegenet"), format="fa")
na <- read.dna(system.file("files/pdH1N1-NA.fasta",package="adegenet"), format="fa")
## COMPUTE NUCLEOTIDIC DISTANCES
nbNucl <- ncol(as.matrix(ha)) + ncol(as.matrix(na))
D <- dist.dna(ha,model="raw")*ncol(as.matrix(ha)) + dist.dna(na,model="raw")*ncol(as.matrix(na))
D <- round(as.matrix(D))
## MATRIX OF SPATIAL CONNECTIVITY
## (to promote local transmissions)
xy <- cbind(dat$lon, dat$lat)
temp <- as.matrix(dist(xy))
M <- 1* (temp < 1e-10)
## SEQTRACK ANALYSIS
dat$date <- as.POSIXct(dat$date)
res <- seqTrack(D, rownames(dat), dat$date, prox.mat=M, mu=.00502/365, haplo.le=nbNucl)
## COMPUTE GENETIC LIKELIHOOD
p <- get.likelihood(res, mu=.00502/365, haplo.length=nbNucl)
# (these could be shown as colors when plotting results)
# (but mutations will be used instead)
## EXAMINE RESULTS
head(res)
tail(res)
range(res$weight, na.rm=TRUE)
barplot(table(res$weight)/sum(!is.na(res$weight)), ylab="Frequency",xlab="Mutations between inferred ancestor and descendent", col="orange")
## DISPLAY SPATIO-TEMPORAL DYNAMICS
if(require(maps)){
myDates <- as.integer(difftime(dat$date, as.POSIXct("2009-01-21"), unit="day"))
myMonth <- as.POSIXct(c("2009-02-01", "2009-03-01","2009-04-01","2009-05-01","2009-06-01","2009-07-01"))
x.month <- as.integer(difftime(myMonth, as.POSIXct("2009-01-21"), unit="day"))
## FIRST STAGE:
## SPREAD TO THE USA AND CANADA
curRange <- as.POSIXct(c("2009-03-29","2009-04-25"))
par(bg="deepskyblue")
map("world", fill=TRUE, col="grey")
opal <- palette()
palette(rev(heat.colors(10)))
plotSeqTrack(res, round(xy), add=TRUE,annot=FALSE,lwd=2, date.range=curRange, col=res$weight+1)
title(paste(curRange, collapse="to "))
legend("bottom", lty=1, leg=0:8, title="number of mutations", col=1:9, lwd=2, horiz=TRUE)
## SECOND STAGE:
## SPREAD WITHIN AMERICA, FIRST SEEDING OUTSIDE AMERICA
curRange <- as.POSIXct(c("2009-04-30","2009-05-07"))
par(bg="deepskyblue")
map("world", fill=TRUE, col="grey")
opal <- palette()
palette(rev(heat.colors(10)))
plotSeqTrack(res, round(xy), add=TRUE,annot=FALSE,lwd=2, date.range=curRange, col=res$weight+1)
title(paste(curRange, collapse="to "))
legend("bottom", lty=1, leg=0:8, title="number of mutations", col=1:9,lwd=2, horiz=TRUE)
## THIRD STAGE:
## PANDEMIC
curRange <- as.POSIXct(c("2009-05-15","2009-05-25"))
par(bg="deepskyblue")
map("world", fill=TRUE, col="grey")
opal <- palette()
palette(rev(heat.colors(10)))
plotSeqTrack(res, round(xy), add=TRUE,annot=FALSE,lwd=2, date.range=curRange, col=res$weight+1)
title(paste(curRange, collapse="to "))
legend("bottom", lty=1, leg=0:8, title="number of mutations", col=1:9,lwd=2, horiz=TRUE)
}
}
Run the code above in your browser using DataLab