Learn R Programming

Lahman (version 4.0-1)

Pitching: Pitching table

Description

Pitching table

Usage

data(Pitching)

Arguments

Format

A data frame with 43330 observations on the following 30 variables.
playerID
Player ID code
yearID
Year
stint
player's stint (order of appearances within a season)
teamID
Team; a factor
lgID
League; a factor with levels AA AL FL NL PL UA
W
Wins
L
Losses
G
Games
GS
Games Started
CG
Complete Games
SHO
Shutouts
SV
Saves
IPouts
Outs Pitched (innings pitched x 3)
H
Hits
ER
Earned Runs
HR
Homeruns
BB
Walks
SO
Strikeouts
BAOpp
Opponent's Batting Average
ERA
Earned Run Average
IBB
Intentional Walks
WP
Wild Pitches
HBP
Batters Hit By Pitch
BK
Balks
BFP
Batters faced by Pitcher
GF
Games Finished
R
Runs Allowed
SH
Sacrifices by opposing batters
SF
Sacrifice flies by opposing batters
GIDP
Grounded into double plays by opposing batter

Source

Lahman, S. (2015) Lahman's Baseball Database, 1871-2014, 2015 version, http://baseball1.com/statistics/

Examples

Run this code
# Pitching data

require(plyr)

###################################
# cleanup, and add some other stats
###################################

# Restrict to AL and NL data, 1901+
# All data re SH, SF and GIDP are missing, so remove
# Intentional walks (IBB) not recorded until 1955
pitching <- subset(Pitching, yearID >= 1901 & lgID %in% c("AL", "NL"))[, -(28:30)]

# Approximate missing BAOpp values (most common remaining missing value)
pitching$BAOpp <- with(pitching, round(H/(BFP - BB - HBP), 3))
# Compute WHIP (hits + walks per inning pitched -- lower is better)
pitching <- mutate(pitching, 
                   WHIP = round((H + BB) * 3/IPouts, 2),
                   KperBB = round(ifelse(yearID >= 1955, 
                                         SO/(BB - IBB), SO/BB), 2))

#####################
# some simple queries
#####################

# Team pitching statistics, Toronto Blue Jays, 1993
tor93 <- subset(pitching, yearID == 1993 & teamID == "TOR")
arrange(tor93, ERA)

# Career pitching statistics, Greg Maddux
subset(pitching, playerID == "maddugr01")

# Best ERAs for starting pitchers post WWII
postwar <- subset(pitching, yearID >= 1946 & IPouts >= 600)
head(arrange(postwar, ERA), 10)

# Best K/BB ratios post-1955 among starters (excludes intentional walks)
post55 <- subset(pitching, yearID >= 1955 & IPouts >= 600)
post55 <- mutate(post55, KperBB = SO/(BB - IBB))
head(arrange(post55, desc(KperBB)), 10)

# Best K/BB ratios among relievers post-1950 (min. 20 saves)
head(arrange(subset(pitching, yearID >= 1950 & SV >= 20), desc(KperBB)), 10)

###############################################
# Winningest pitchers in each league each year:
###############################################

# Add name & throws information:
masterInfo <- Master[, c('playerID',
                         'nameLast', 'nameFirst', 'throws')]
pitching <- merge(pitching, masterInfo, all.x=TRUE)

wp <- ddply(pitching, .(yearID, lgID), subset, W == max(W), 
         select = c("playerID", "teamID", "W", "throws"))

anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = wp))

# an eye-catching, but naive, specious graph 

require('ggplot2') 
# compare loess smooth with quadratic fit                              
ggplot(wp, aes(x = yearID, y = W)) +
    geom_point(aes(colour = throws, shape=lgID), size = 2) +
    geom_smooth(method="loess", size=1.5, color="blue") +
    geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ poly(x,2)) +
    ylab("Maximum Wins") + xlab("Year") +
    ggtitle("Why can't pitchers win 30+ games any more?")


Run the code above in your browser using DataLab