## ---------------------------------------------------------------------
## A. BASIC USAGE
## ---------------------------------------------------------------------
## We can use listDatasets() from the biomaRt package to list the
## datasets available in the "ensembl" BioMart database:
library(biomaRt)
head(listDatasets(useMart("ensembl")))
## Retrieve the full transcript dataset for Worm:
txdb1 <- makeTxDbFromBiomart(dataset="celegans_gene_ensembl")
txdb1
## Retrieve an incomplete transcript dataset for Human:
transcript_ids <- c(
"ENST00000013894",
"ENST00000268655",
"ENST00000313243",
"ENST00000435657",
"ENST00000384428",
"ENST00000478783"
)
txdb2 <- makeTxDbFromBiomart(dataset="hsapiens_gene_ensembl",
transcript_ids=transcript_ids)
txdb2 # note that these annotations match the GRCh38 genome assembly
## ---------------------------------------------------------------------
## B. USING A HOST OTHER THAN www.biomart.org
## ---------------------------------------------------------------------
## A typical use case is to access the "ensembl" BioMart database on a
## mirror e.g. on uswest.ensembl.org. A gotcha when doing this is that
## the name of the database on the mirror can be different! We can check
## this with listMarts() from the biomaRt package:
listMarts(host="useast.ensembl.org")
## Therefore, in addition to setting 'host' to "uswest.ensembl.org" we
## must also change the name passed to the 'biomart' argument:
txdb3 <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_ENSEMBL",
dataset="hsapiens_gene_ensembl",
transcript_ids=transcript_ids,
host="useast.ensembl.org")
txdb3
## ---------------------------------------------------------------------
## C. USING FILTERS
## ---------------------------------------------------------------------
## We can use listFilters() from the biomaRt package to get valid filter
## names:
mart <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
head(listFilters(mart))
## Retrieve transcript dataset for Ensembl gene ENSG00000011198:
my_filter <- list(ensembl_gene_id="ENSG00000011198")
txdb4 <- makeTxDbFromBiomart(dataset="hsapiens_gene_ensembl",
filters=my_filter)
txdb4
transcripts(txdb4, columns=c("tx_id", "tx_name", "gene_id"))
transcriptLengths(txdb4)
## ---------------------------------------------------------------------
## D. RETRIEVING CHROMOSOME INFORMATION ONLY
## ---------------------------------------------------------------------
chrominfo <- getChromInfoFromBiomart(dataset="celegans_gene_ensembl")
chrominfo
Run the code above in your browser using DataLab