# the simplest way to retrieve all names of genomes stored within NCBI databases
head(listGenomes() , 5)
# show all details
head(listGenomes(details = TRUE) , 5)
# show all details only for Bacteria
head(listGenomes(kingdom = "Bacteria", details = TRUE) , 5)
# in case you are interested in the number of genomes available for each kingdom, run:
ncbi_genomes <- listGenomes(details = TRUE)
table(ncbi_genomes[ , "kingdoms"])
# analogous, if you are interested in the number of genomes available for each group, run:
ncbi_genomes <- listGenomes(details = TRUE)
table(ncbi_genomes[ , "group"])
# for subgroup
table(ncbi_genomes[ , "subgroup"])
# you can also limit your search to the refseq database
head(listGenomes(database = "refseq") , 20)
head(listGenomes(details=TRUE, database = "refseq") , 5)
head(listGenomes(kingdom = "Eukaryota", details = TRUE,database = "refseq") , 5)
# order by file size
library(dplyr)
head(arrange(ncbi_genomes, desc(file_size_MB)) , 5)
# you can also update the organism table using the 'update' argument
head(listGenomes(details = TRUE,update = TRUE) , 5)
Run the code above in your browser using DataLab