Learn R Programming

Lahman (version 2.0-1)

Schools: Schools table

Description

Information on schools players attended, by school

Usage

data(Schools)

Arguments

source

Lahman, S. (2010) Lahman's Baseball Database, 1871-2012, 2012 version, http://baseball1.com/statistics/

Examples

Run this code
require(plyr)

# how many different schools are listed in each state?
table(Schools$schoolState)

# top 20 schools 
schoolInfo <- Schools[, c("schoolID", "schoolName", "schoolCity", "schoolState")]

schoolCount <- ddply(SchoolsPlayers, .(schoolID), summarise,
                       players = length(schoolID))
schoolCount <- merge(schoolCount, schoolInfo, by="schoolID", all.x=TRUE)

# Arrange in decreasing order:
schoolCount <- arrange(schoolCount, desc(players))
head(schoolCount, 20)

# sum counts by state
schoolStates <- ddply(schoolCount, .(schoolState), summarise,
                       players = sum(players),
                       schools = length(schoolState))
str(schoolStates)
summary(schoolStates)

if(require(zipcode) {
	# in lieu of more precise geocoding via schoolName, 
	# find lat/long of Schools from zipcode file
	zips <- ddply(zipcode, .(city, state), summarize, latitude=mean(latitude), longitude=mean(longitude))
	colnames(zips)[1:2] <- c("schoolCity", "schoolState")
	str(zips)
	
	# merge lat/long from zips
	schoolsXY <- merge(Schools, zips, by=c("schoolCity", "schoolState"), all.x=TRUE)
	str(schoolsXY)
	
	# plot school locations
	with(subset(schoolsXY, schoolState != 'HI'),
		plot(jitter(longitude), jitter(latitude))
		)
	}

Run the code above in your browser using DataLab