# Collect PITCHf/x (and other data from inning_all.xml files) from
# all games played on August 1st, 2013 (using asynchronous downloads)
dat <- scrape(start = "2013-08-01", end = "2013-08-01")
#As of XML2R 0.0.5, asyncronous downloads can be performed
dat <- scrape(start = "2013-08-01", end = "2013-08-01", async = TRUE)
# Scrape PITCHf/x from Minnesota Twins 2011 season
data(gids, package="pitchRx")
twins11 <- gids[grepl("min", gids) & grepl("2011", gids)]
dat <- scrape(game.ids=twins11)
# Create SQLite database, then collect and store data in that database
library(dplyr)
my_db <- src_sqlite("Gameday.sqlite3", create=T)
scrape(start = "2013-08-01", end = "2013-08-01", connect=my_db$con)
# Collect other data complementary to PITCHf/x and store in database
files <- c("inning/inning_hit.xml", "miniscoreboard.xml", "players.xml")
scrape(start = "2013-08-01", end = "2013-08-01", connect=my_db$con, suffix = files)
# Simple example to demonstrate database query using dplyr
# Note that 'num' and 'url' together make a key that allows us to join these tables
locations <- select(tbl(my_db, "pitches"), px, pz, des, num, url)
names <- select(tbl(my_db, "atbats"), pitcher_name, batter_name, num, url)
que <- inner_join(locations, filter(names, batter_name == "Paul Goldschmidt"))
que$query #refine sql query if you'd like
pitchfx <- collect(que) #submit query and bring data into RRun the code above in your browser using DataLab