# For users convenience, we provide a prebuilt dataset
# containing the Hg18 chr lengths, and centromeres location.
hg18
# The same dataset can be obtained as follow:
library(BSgenome)
library(rtracklayer)
getChrLength <- function(genome){
genome <- sprintf("BSgenome.Hsapiens.UCSC. g <- getBSgenome(genome, masked=FALSE)
data.frame(chrom=1:24, length=seqlengths(g)[1:24])
}
.chrAsNum <- function(tbl){
tbl$chrom <- gsub("chr", "", tbl$chrom)
tbl$chrom[tbl$chrom=="X"] <- 23
tbl$chrom[tbl$chrom=="Y"] <- 24
tbl$chrom <- as.numeric(tbl$chrom)
tbl[order(tbl$chrom),]
}
getCentromeres <- function(genome){
mySession <- try(browserSession("UCSC"), silent=TRUE)
# In case it fails, use another mirror
if(inherits(mySession, "try-error"))
mySession <- browserSession("UCSC",
url="http://genome-euro.ucsc.edu/cgi-bin/")
genome(mySession) <- genome
obj <- ucscTableQuery(mySession, table="gap")
tbl <- getTable(obj)
tbl <- tbl[tbl$type=="centromere", c("chrom", "chromStart", "chromEnd")]
colnames(tbl)[2:3] <- c("centromerStart", "centromerEnd")
.chrAsNum(tbl)
}
makeHg <- function(genome){
chrL <- getChrLength(genome)
ctm <- getCentromeres(genome)
tbl <- merge(chrL, ctm, by="chrom")
cumlen <- c(0, cumsum(as.numeric(tbl$length))[-nrow(tbl)])
cbind.data.frame(tbl, cumlen=cumlen)
}
hg18 <- makeHg("hg18")
hg18
Run the code above in your browser using DataLab