## Not run: ------------------------------------
# # Connection basics ---------------------------------------------------------
# # To connect to a database first create a src:
# my_db <- src_sqlite(path = tempfile(), create = TRUE)
# # Then reference a tbl within that src
# my_tbl <- tbl(my_db, "my_table")
## ---------------------------------------------
# Here we'll use the Lahman database: to create your own local copy,
# run lahman_sqlite()
## Not run: ------------------------------------
# if (requireNamespace("RSQLite") && has_lahman("sqlite")) {
# lahman_s <- lahman_sqlite()
# # Methods -------------------------------------------------------------------
# batting <- tbl(lahman_s, "Batting")
# dim(batting)
# colnames(batting)
# head(batting)
#
# # Data manipulation verbs ---------------------------------------------------
# filter(batting, yearID > 2005, G > 130)
# select(batting, playerID:lgID)
# arrange(batting, playerID, desc(yearID))
# summarise(batting, G = mean(G), n = n())
# mutate(batting, rbi2 = 1.0 * R / AB)
#
# # note that all operations are lazy: they don't do anything until you
# # request the data, either by `print()`ing it (which shows the first ten
# # rows), by looking at the `head()`, or `collect()` the results locally.
#
# system.time(recent <- filter(batting, yearID > 2010))
# system.time(collect(recent))
#
# # Group by operations -------------------------------------------------------
# # To perform operations by group, create a grouped object with group_by
# players <- group_by(batting, playerID)
# group_size(players)
#
# # sqlite doesn't support windowed functions, which means that only
# # grouped summaries are really useful:
# summarise(players, mean_g = mean(G), best_ab = max(AB))
#
# # When you group by multiple level, each summarise peels off one level
# per_year <- group_by(batting, playerID, yearID)
# stints <- summarise(per_year, stints = max(stint))
# filter(ungroup(stints), stints > 3)
# summarise(stints, max(stints))
#
# # Joins ---------------------------------------------------------------------
# player_info <- select(tbl(lahman_s, "Master"), playerID, birthYear)
# hof <- select(filter(tbl(lahman_s, "HallOfFame"), inducted == "Y"),
# playerID, votedBy, category)
#
# # Match players and their hall of fame data
# inner_join(player_info, hof)
# # Keep all players, match hof data where available
# left_join(player_info, hof)
# # Find only players in hof
# semi_join(player_info, hof)
# # Find players not in hof
# anti_join(player_info, hof)
#
# # Arbitrary SQL -------------------------------------------------------------
# # You can also provide sql as is, using the sql function:
# batting2008 <- tbl(lahman_s,
# sql("SELECT * FROM Batting WHERE YearID = 2008"))
# batting2008
# }
## ---------------------------------------------
Run the code above in your browser using DataCamp Workspace