# NOT RUN {
####################################
# Basic career summaries by manager
####################################
library("dplyr")
mgrSumm <- Managers %>%
            group_by(playerID) %>%
            summarise(nyear = length(unique(yearID)),
                      yearBegin = min(yearID),
                      yearEnd = max(yearID),
                      nTeams = length(unique(teamID)),
                      nfirst = sum(rank == 1L),
                      W = sum(W),
                      L = sum(L),
                      WinPct = round(W/(W + L), 3))
MgrInfo <- People %>%
            filter(!is.na(playerID)) %>%
            select(playerID, nameLast, nameFirst)
# Merge names into the table
mgrTotals <- right_join(MgrInfo, mgrSumm, by = "playerID")
# add total games managed
mgrTotals <- mgrTotals %>%
              mutate(games = W + L)
##########################
# Some basic queries
##########################
# Top 20 managers in terms of years of service:
mgrTotals %>%
   arrange(desc(nyear)) %>%
   head(., 20)
# Top 20 winningest managers (500 games minimum)
mgrTotals %>%
   filter((W + L) >= 500) %>%
   arrange(desc(WinPct)) %>%
   head(., 20)
# Most of these are 19th century managers.
# How about the modern era?
mgrTotals %>%
   filter(yearBegin >= 1901 & (W + L) >= 500) %>%
   arrange(desc(WinPct)) %>%
   head(., 20)
# Top 10 managers in terms of percentage of titles 
# (league or divisional) - should bias toward managers
#  post-1970 since more first place finishes are available
mgrTotals %>%
   filter(yearBegin >= 1901 & (W + L) >= 500) %>%
   arrange(desc(round(nfirst/nyear, 3))) %>%
   head(., 10)
# How about pre-1969?
mgrTotals %>%
  filter(yearBegin >= 1901 & yearEnd <= 1969 &
          (W + L) >= 500) %>%
  arrange(desc(round(nfirst/nyear, 3))) %>%
  head(., 10)
## Tony LaRussa's managerial record by team
Managers %>%
  filter(playerID == "larusto01") %>%
  group_by(teamID) %>%
  summarise(nyear = length(unique(yearID)),
            yearBegin = min(yearID),
            yearEnd = max(yearID),
            games = sum(G),
            nfirst = sum(rank == 1L),
            W = sum(W),
            L = sum(L),
            WinPct = round(W/(W + L), 3))
##############################################
# Density plot of the number of games managed:
##############################################
library("ggplot2")
ggplot(mgrTotals, aes(x = games)) + 
    geom_density(fill = "red", alpha = 0.3) +
    labs(x = "Number of games managed")
# Who managed more than 4000 games?
mgrTotals %>% 
  filter(W + L >= 4000) %>%
  arrange(desc(W + L))
# Connie Mack's advantage: he owned the Philadelphia A's :)
# Table of Tony LaRussa's team finishes (rank order):
Managers %>%
   filter(playerID == "larusto01") %>%
   count(rank)
##############################################
# Scatterplot of winning percentage vs. number 
# of games managed (min 100)
##############################################
ggplot(subset(mgrTotals, yearBegin >= 1900 & games >= 100),
       aes(x = games, y = WinPct)) + 
  geom_point() + geom_smooth() +
  labs(x = "Number of games managed")
############################################
# Division titles
############################################
# Plot of number of first place finishes by managers who
# started in the divisional era (>= 1969) with 
# at least 8 years of experience
mgrTotals %>% 
  filter(yearBegin >= 1969 & nyear >= 8) %>%
  ggplot(., aes(x = nyear, y = nfirst)) +
     geom_point(position = position_jitter(width = 0.2)) +
     labs(x = "Number of years", 
          y = "Number of divisional titles") +
     geom_smooth()
# Change response to proportion of titles relative
# to years managed
mgrTotals %>% 
  filter(yearBegin >= 1969 & nyear >= 8) %>%
  ggplot(., aes(x = nyear, y = round(nfirst/nyear, 3))) +
     geom_point(position = position_jitter(width = 0.2)) +
     labs(x = "Number of years", 
          y = "Proportion of divisional titles") +
     geom_smooth()
# }
Run the code above in your browser using DataLab