data("Batting", package = "Lahman")
data("Master", package = "Lahman")
batting_dt <- tbl_dt(Batting)
person_dt <- tbl_dt(Master)
# Inner join: match batting and person data
inner_join(batting_dt, person_dt)
# Left join: keep batting data even if person missing
left_join(batting_dt, person_dt)
# Semi-join: find batting data for top 4 teams, 2010:2012
grid <- expand.grid(
teamID = c("WAS", "ATL", "PHI", "NYA"),
yearID = 2010:2012)
top4 <- semi_join(batting_dt, grid, copy = TRUE)
# Anti-join: find batting data with out player data
anti_join(batting_dt, person_dt)
Run the code above in your browser using DataLab