Learn R Programming

Lahman (version 4.0-1)

Schools: Schools table

Description

Information on schools players attended, by school

Usage

data(Schools)

Arguments

Format

A data frame with 749 observations on the following 5 variables.
schoolID
school ID code
name_full
school name
city
city where school is located
state
state where school's city is located
country
country where school is located

Source

Lahman, S. (2015) Lahman's Baseball Database, 1871-2014, 2015 version, http://baseball1.com/statistics/

Examples

Run this code

require(plyr)

# How many different schools are listed in each state?
table(Schools$state)
 
# How many different schools are listed in each country?
table(Schools$country)

# Top 20 schools 
schoolInfo <- Schools[, c("schoolID", "name_full", "city", "state")]

schoolCount <- ddply(CollegePlaying, .(schoolID), summarise,
                       players = length(schoolID))
schoolCount <- merge(schoolCount, schoolInfo, by="schoolID", all.x=TRUE)

# Arrange in decreasing order:
schoolCount <- arrange(schoolCount, desc(players))
head(schoolCount, 20)

# sum counts by state
schoolStates <- ddply(schoolCount, .(state), summarise,
                       players = sum(players),
                       schools = length(state))
str(schoolStates)
summary(schoolStates)

## Not run: 
# if(require(zipcode)) {
#   # in lieu of more precise geocoding via schoolName, 
#   # find lat/long of Schools from zipcode file
#   zips <- ddply(zipcode, .(city, state), summarize,
#                 latitude=mean(latitude), longitude=mean(longitude))
#   colnames(zips)[1:2] <- c("city", "state")
#   str(zips)
# 
#   # merge lat/long from zips
#   schoolsXY <- merge(Schools, zips, by=c("city", "state"), all.x=TRUE)
#   str(schoolsXY)
# 
#   # plot school locations
#   with(subset(schoolsXY, schoolState != 'HI'),
#     plot(jitter(longitude), jitter(latitude))
#   )
# }
# ## End(Not run)

Run the code above in your browser using DataLab