if(interactive()){
# initialize connection to Lahman baseball database in Aster
conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
# get summaries to save time
pitchingInfo = getTableSummary(conn, 'pitching_enh')
battingInfo = getTableSummary(conn, 'batting_enh')
# Boxplots
# all numerical attributes
showData(conn, tableInfo=pitchingInfo, format='boxplot',
title='Boxplots of numeric columns')
# select certain attributes only
showData(conn, tableInfo=pitchingInfo, format='boxplot',
include=c('wp','whip', 'w', 'sv', 'sho', 'l', 'ktobb', 'ibb', 'hbp', 'fip',
'era', 'cg', 'bk', 'baopp'),
useIQR=TRUE, title='Boxplots of Pitching Stats')
# exclude certain attributes
showData(conn, tableInfo=pitchingInfo, format='boxplot',
except=c('item_id','ingredient_item_id','facility_id','rownum','decadeid','yearid',
'bfp','ipouts'),
useIQR=TRUE, title='Boxplots of Pitching Stats')
# flip coordinates
showData(conn, tableInfo=pitchingInfo, format='boxplot',
except=c('item_id','ingredient_item_id','facility_id','rownum','decadeid','yearid',
'bfp','ipouts'),
useIQR=TRUE, coordFlip=TRUE, title='Boxplots of Pitching Stats')
# boxplot with facet (facet_wrap)
showData(conn, tableInfo=pitchingInfo, format='boxplot',
include=c('bfp','er','h','ipouts','r','so'), facet=TRUE, scales='free',
useIQR=TRUE, title='Boxplots Pitching Stats: bfp, er, h, ipouts, r, so')
# Correlation matrix
# on all numerical attributes
showData(conn, tableName='pitching_enh', tableInfo=pitchingInfo,
format='corr')
# correlation matrix on selected attributes
# with labeling by attribute pair name and
# controlling size of correlation bubbles
showData(conn, tableName='pitching', tableInfo=pitchingInfo,
include=c('era','h','hr','gs','g','sv'),
format='corr', corrLabel='pair', shapeSizeRange=c(5,25))
# Histogram on all numeric attributes
showData(conn, tableName='pitching', tableInfo=pitchingInfo, include=c('hr'),
format='histogram')
# Overview is a histogram of statistical measures across attributes
showData(conn, tableName='pitching', tableInfo=pitchingInfo,
format='overview', type='numeric', scales="free_y")
# Scatterplots
# Scatterplot on pair of numerical attributes
# sample by size with 1d facet (see \code{\link{facet_wrap}})
showData(conn, 'pitching_enh', format='scatterplot',
include=c('so', 'er'), facetName="lgid", pointColour="lgid",
sampleSize=10000, regressionLine=TRUE,
title="SO vs ER by League 1980-2000",
where='yearid between 1980 and 2000')
# sample by fraction with 2d facet (see \code{\link{facet_grid}})
showData(conn, 'pitching_enh', format='scatterplot',
include=c('so','er'), facetName=c('lgid','decadeid'), pointColour="lgid",
sampleFraction=0.1, regressionLine=TRUE,
title="SO vs ER by League by Decade 1980 - 2012",
where='yearid between 1980 and 2012')
}
Run the code above in your browser using DataLab