# For users convenience, we provide a prebuilt dataset
# containing the Hg38 chr lengths, and centromeres location.
hg38
# The same dataset can be obtained as follow:
## Not run:
# library(BSgenome)
# library(rtracklayer)
#
# getChrLength <- function(genome){
# genome <- sprintf("BSgenome.Hsapiens.UCSC.
# g <- getBSgenome(genome, masked=FALSE)
# data.frame(chrom=1:24, length=seqlengths(g)[1:24])
# }
# .chrAsNum <- function(tbl){
# tbl$chrom <- gsub("chr", "", tbl$chrom)
# tbl$chrom[tbl$chrom=="X"] <- 23
# tbl$chrom[tbl$chrom=="Y"] <- 24
# tbl$chrom <- as.numeric(tbl$chrom)
# tbl[order(tbl$chrom),]
# }
# getCentromeres <- function(genome){
# mySession <- try(browserSession("UCSC"), silent=TRUE)
# # In case it fails, use another mirror
# if(inherits(mySession, "try-error"))
# mySession <- browserSession("UCSC",
# url="http://genome-euro.ucsc.edu/cgi-bin/")
# genome(mySession) <- genome
# obj <- ucscTableQuery(mySession, table="gap")
# tbl <- getTable(obj)
# if(!"centromere"
# return(NULL)
# tbl <- tbl[tbl$type=="centromere", c("chrom", "chromStart", "chromEnd")]
# colnames(tbl)[2:3] <- c("centromerStart", "centromerEnd")
# .chrAsNum(tbl)
# }
# makeHg <- function(genome){
# chrL <- getChrLength(genome)
# ctm <- getCentromeres(genome)
# # Notice that, in case of Hg38, centromeres locations are in Hg19.
# if(is.null(ctm))
# ctm <- getCentromeres("hg19")
# tbl <- merge(chrL, ctm, by="chrom")
# cumlen <- c(0, cumsum(as.numeric(tbl$length))[-nrow(tbl)])
# cbind.data.frame(tbl, cumlen=cumlen)
# }
# hg38 <- makeHg("hg38")
# hg38
# ## End(Not run)
Run the code above in your browser using DataLab