# Collect PITCHf/x (and other data from inning_all.xml files) from
# all games played on August 1st, 2013 (using asynchronous downloads)
dat <- scrape(start = "2013-08-01", end = "2013-08-01")
#As of XML2R 0.0.5, asyncronous downloads can be performed
dat <- scrape(start = "2013-08-01", end = "2013-08-01", async = TRUE)
# Scrape PITCHf/x from Minnesota Twins 2011 season
data(gids, package="pitchRx")
twins11 <- gids[grepl("min", gids) & grepl("2011", gids)]
dat <- scrape(game.ids=twins11[1]) #scrapes 1st game only
# Create SQLite database, then collect and store data in that database
library(dplyr)
my_db <- src_sqlite("Gameday.sqlite3")
scrape(start = "2013-08-01", end = "2013-08-01", connect=my_db$con)
# Collect other data complementary to PITCHf/x and store in database
files <- c("inning/inning_hit.xml", "miniscoreboard.xml", "players.xml")
scrape(start = "2013-08-01", end = "2013-08-01", connect=my_db$con, suffix = files)
# Simple example to demonstrate database query using dplyr
# Note that 'num' and 'gameday_link' together make a key that allows us to join these tables
locations <- select(tbl(my_db, "pitch"), px, pz, des, num, gameday_link)
names <- select(tbl(my_db, "atbat"), pitcher_name, batter_name, num, gameday_link)
que <- inner_join(locations, filter(names, batter_name == "Paul Goldschmidt"),
by = c("num", "gameday_link"))
que$query #refine sql query if you'd like
pitchfx <- collect(que) #submit query and bring data into RRun the code above in your browser using DataLab