library(BSgenome.Hsapiens.UCSC.hg19)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
## ----------------------------
## VCF object as query
## ----------------------------
## Read variants from a VCF file
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
vcf <- readVcf(fl, "hg19")
## Rename seqlevels in the VCF object to match those in the TxDb.
vcf <- renameSeqlevels(vcf, "chr22")
## Confirm common seqlevels
intersect(seqlevels(vcf), seqlevels(txdb))
## When 'query' is a VCF object the varAllele argument is missing.
coding1 <- predictCoding(vcf, txdb, Hsapiens)
head(coding1, 3)
## Exon-centric or cDNA locations:
exonsbytx <- exonsBy(txdb, "tx")
cDNA <- mapCoords(coding1[!duplicated(ranges(coding1))], exonsbytx)
coding1$cDNA <- ranges(cDNA)[togroup(coding1$QUERYID)]
## ----------------------------
## GRanges object as query
## ----------------------------
## Alternatively, a GRanges can be the 'query' to predictCoding().
## The seqlevels were previously adjusted in the VCF object so the GRanges
## extracted from rowData() has the correct seqlevels.
rd <- rowData(vcf)
## The GRanges must be expanded to have one row per alternate allele.
## Variants 1, 2 and 10 have two alternate alleles.
altallele <- alt(vcf)
eltlen <- elementLengths(altallele)
rd_exp <- rep(rd, eltlen)
## Call predictCoding() with the expanded GRanges as the 'query'
## and the unlisted alternate allele as the 'varAllele'.
coding2 <- predictCoding(rd_exp, txdb, Hsapiens, unlist(altallele))
identical(coding1, coding2)
Run the code above in your browser using DataLab