fl <- system.file("extdata", "structural.vcf", package="VariantAnnotation")
vcf <- readVcf(fl, genome="hg19")
## ----------------------------------------------------------------
## Accessors
## ----------------------------------------------------------------
## Variant locations are stored in the GRanges object returned by
## the rowRanges() accessor.
rowRanges(vcf)
## Individual fields can be extracted with ref(), alt(), qual()
## and filt().
qual(vcf)
ref(vcf)
## All 'info' fields can be extracted together along
## with the GRanges of locations.
head(info(vcf))
## All genotype fields can be seen with geno(). Individual
## fields are accessed with '$' or '[['.
geno(vcf)
identical(geno(vcf)$GQ, geno(vcf)[[2]])
## ----------------------------------------------------------------
## Renaming seqlevels and subsetting
## ----------------------------------------------------------------
## Overlap and matching operations require that the objects
## being compared have the same seqlevels (chromosome names).
## It is often the case that the seqlevesls in on of the objects
## needs to be modified to match the other. In this VCF, the
## seqlevels are numbers instead of preceded by "chr" or "ch".
seqlevels(vcf)
## Rename the seqlevels to start with 'chr'.
vcf2 <- renameSeqlevels(vcf, paste0("chr", seqlevels(vcf)))
seqlevels(vcf2)
## The VCF can also be subset by seqlevel using 'keepSeqlevels'
## or 'dropSeqlevels'. See ?keepSeqlevels for details.
vcf3 <- keepSeqlevels(vcf2, "chr2")
seqlevels(vcf3)
## ----------------------------------------------------------------
## Header information
## ----------------------------------------------------------------
## Header data can be modified in the 'meta', 'info' and 'geno'
## slots of the VCFHeader object. See ?VCFHeader for details.
## Current 'info' fields.
rownames(info(header(vcf)))
## Add a new field to the header.
newInfo <- DataFrame(Number=1, Type="Integer",
Description="Number of Samples With Data",
row.names="NS")
info(header(vcf)) <- rbind(info(header(vcf)), newInfo)
rownames(info(header(vcf)))
## ----------------------------------------------------------------
## Collapsed and Expanded VCF
## ----------------------------------------------------------------
## readVCF() produces a CollapsedVCF object.
fl <- system.file("extdata", "ex2.vcf",
package="VariantAnnotation")
vcf <- readVcf(fl, genome="hg19")
vcf
## The ALT column is a DNAStringSetList to allow for more
## than one alternate allele per variant.
alt(vcf)
## For structural variants ALT is a CharacterList.
fl <- system.file("extdata", "structural.vcf",
package="VariantAnnotation")
vcf <- readVcf(fl, genome="hg19")
alt(vcf)
## ExpandedVCF is the 'flattened' counterpart of CollapsedVCF.
## The ALT and all variables with Number='A' in the header are
## expanded to one row per alternate allele.
vcfLong <- expand(vcf)
alt(vcfLong)
## Also see the ?VRanges class for an alternative form of
## 'flattened' VCF data.
## ----------------------------------------------------------------
## isSNV()
## ----------------------------------------------------------------
## isSNV() returns a subset VCF containing SNVs only.
vcf <- VCF(rowRanges = GRanges("chr1", IRanges(1:4*3, width=c(1, 2, 1, 1))))
alt(vcf) <- DNAStringSetList("A", c("TT"), c("G", "A"), c("TT", "C"))
ref(vcf) <- DNAStringSet(c("G", c("AA"), "T", "G"))
fixed(vcf)[c("REF", "ALT")]
## SNVs are present in rows 1 (single ALT value), 3 (both ALT values)
## and 4 (1 of the 2 ALT values).
vcf[isSNV(vcf, singleAltOnly=TRUE)]
vcf[isSNV(vcf, singleAltOnly=FALSE)] ## all 3 SNVs
Run the code above in your browser using DataLab