## ----------------------------------------------------------------
  ## Non-probability based snp encoding using "GT"
  ## ----------------------------------------------------------------
  fl <- system.file("extdata", "ex2.vcf", package="VariantAnnotation") 
  vcf <- readVcf(fl, "hg19")
  ## This file has no "GL" or "GP" field so we use "GT".
  geno(vcf)
  ## Convert the "GT" FORMAT field to a SnpMatrix.
  mat <- genotypeToSnpMatrix(vcf)
  ## The result is a list of length 2.
  names(mat)
  ## Compare coding in the VCF file to the SnpMatrix.
  geno(vcf)$GT
  t(as(mat$genotype, "character"))
  ## The 'ignore' column in 'map' indicates which variants 
  ## were set to NA. Variant rs6040355 was ignored because 
  ## it has multiple alternate alleles, microsat1 is not a 
  ## snp, and chr20:1230237 has no alternate allele.
  mat$map
  ## ----------------------------------------------------------------
  ## Probability-based encoding using "GL", "PL" or "GP"
  ## ----------------------------------------------------------------
  ## Read a vcf file with a "GL" field.
  fl <- system.file("extdata", "gl_chr1.vcf", package="VariantAnnotation") 
  vcf <- readVcf(fl, "hg19")
  geno(vcf)
  ## Convert the "GL" FORMAT field to a SnpMatrix
  mat <- genotypeToSnpMatrix(vcf, uncertain=TRUE)
  ## Only 3 of the 9 variants passed the filters.  The
  ## other 6 variants had no alternate alleles.
  mat$map
  ## Compare genotype representations for a subset of
  ## samples in variant rs180734498.
  ## Original called genotype
  geno(vcf)$GT["rs180734498", 14:16]
  ## Original genotype likelihoods
  geno(vcf)$GL["rs180734498", 14:16]
  ## Posterior probability (computed inside genotypeToSnpMatrix)
  GLtoGP(geno(vcf)$GL["rs180734498", 14:16, drop=FALSE])[1,]
  ## SnpMatrix coding.
  t(as(mat$genotype, "character"))["rs180734498", 14:16]
  t(as(mat$genotype, "numeric"))["rs180734498", 14:16]
  ## For samples NA11829 and NA11830, one probability is significantly
  ## higher than the others, so SnpMatrix calls the genotype.  These
  ## calls match the original coding: "0|1" -> "A/B", "0|0" -> "A/A".
  ## Sample NA11831 was originally called as "0|1" but the probability
  ## of "0|0" is only a factor of 3 lower, so SnpMatrix calls it as
  ## "Uncertain" with an appropriate byte-level encoding.
Run the code above in your browser using DataLab