library(EnsDb.Hsapiens.v75)
edb <- EnsDb.Hsapiens.v75
###### genes
##
## get all genes endcoded on chromosome Y
AllY <- genes(edb, filter=SeqnameFilter("Y"))
AllY
## return result as DataFrame.
AllY.granges <- genes(edb,
filter=SeqnameFilter("Y"),
return.type="DataFrame")
AllY.granges
## include all transcripts of the gene and their chromosomal
## coordinates, sort by chrom start of transcripts and return as
## GRanges.
AllY.granges.tx <- genes(edb,
filter=SeqnameFilter("Y"),
columns=c("gene_id", "seq_name",
"seq_strand", "tx_id", "tx_biotype",
"tx_seq_start", "tx_seq_end"),
order.by="tx_seq_start")
AllY.granges.tx
###### transcripts
##
## get all transcripts of a gene
Tx <- transcripts(edb,
filter=GeneidFilter("ENSG00000184895"),
order.by="tx_seq_start")
Tx
## get all transcripts of two genes along with some information on the
## gene and transcript
Tx <- transcripts(edb,
filter=GeneidFilter(c("ENSG00000184895",
"ENSG00000092377")),
columns=c("gene_id", "gene_seq_start",
"gene_seq_end", "gene_biotype", "tx_biotype"))
Tx
###### promoters
##
## get the bona-fide promoters (2k up- to 200nt downstream of TSS)
promoters(edb, filter=GeneidFilter(c("ENSG00000184895",
"ENSG00000092377")))
###### exons
##
## get all exons of the provided genes
Exon <- exons(edb,
filter=GeneidFilter(c("ENSG00000184895",
"ENSG00000092377")),
order.by="exon_seq_start",
columns=c( "gene_id", "gene_seq_start",
"gene_seq_end", "gene_biotype"))
Exon
##### exonsBy
##
## get all exons for transcripts encoded on chromosomes X and Y.
ETx <- exonsBy(edb, by="tx",
filter=SeqnameFilter(c("X", "Y")))
ETx
## get all exons for genes encoded on chromosome 1 to 22, X and Y and
## include additional annotation columns in the result
EGenes <- exonsBy(edb, by="gene",
filter=SeqnameFilter(c("X", "Y")),
columns=c("gene_biotype", "gene_name"))
EGenes
## Note that this might also contain "LRG" genes.
length(grep(names(EGenes), pattern="LRG"))
## to fetch just Ensemblgenes, use an GeneidFilter with value
## "ENS%" and condition "like"
##### transcriptsBy
##
TGenes <- transcriptsBy(edb, by="gene",
filter=SeqnameFilter(c("X", "Y")))
TGenes
## convert this to a SAF formatted data.frame that can be used by the
## featureCounts function from the Rsubreader package.
head(toSAF(TGenes))
##### transcriptsByOverlaps
##
ir <- IRanges(start=c(2654890, 2709520, 28111770),
end=c(2654900, 2709550, 28111790))
gr <- GRanges(rep("Y", length(ir)), ir)
## Retrieve all transcripts overlapping any of the regions.
txs <- transcriptsByOverlaps(edb, gr)
txs
## Alternatively, use a GRangesFilter
grf <- GRangesFilter(gr, condition="overlapping")
txs <- transcripts(edb, filter=grf)
txs
#### cdsBy
## Get the coding region for all transcripts on chromosome Y.
## Specifying also additional annotation columns (in addition to the default
## exon_id and exon_rank).
cds <- cdsBy(edb, by="tx", filter=SeqnameFilter("Y"),
columns=c("tx_biotype", "gene_name"))
#### the 5' untranslated regions:
fUTRs <- fiveUTRsByTranscript(edb, filter=SeqnameFilter("Y"))
#### the 3' untranslated regions with additional column gene_name.
tUTRs <- threeUTRsByTranscript(edb, filter=SeqnameFilter("Y"),
columns="gene_name")
Run the code above in your browser using DataLab