if(interactive()){
# initialize connection to Lahman baseball database in Aster
conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
km = computeKmeans(conn, "batting", centers=5, iterMax = 25,
aggregates = c("COUNT(*) cnt", "AVG(g) avg_g", "AVG(r) avg_r", "AVG(h) avg_h"),
id="playerid || '-' || stint || '-' || teamid || '-' || yearid",
include=c('g','r','h'), scaledTableName='kmeans_test_scaled',
centroidTableName='kmeans_test_centroids',
where="yearid > 2000")
km
createCentroidPlot(km)
createClusterPlot(km)
# persist clustered data
kmc = computeKmeans(conn, "batting", centers=5, iterMax = 250,
aggregates = c("COUNT(*) cnt", "AVG(g) avg_g", "AVG(r) avg_r", "AVG(h) avg_h"),
id="playerid || '-' || stint || '-' || teamid || '-' || yearid",
include=c('g','r','h'),
persist = TRUE,
scaledTableName='kmeans_test_scaled',
centroidTableName='kmeans_test_centroids',
clusteredTableName = 'kmeans_test_clustered',
tempTableName = 'kmeans_test_temp',
where="yearid > 2000")
createCentroidPlot(kmc)
createCentroidPlot(kmc, format="bar_dodge")
createCentroidPlot(kmc, format="heatmap", coordFlip=TRUE)
createClusterPlot(kmc)
kmc = computeClusterSample(conn, kmc, 0.01)
createClusterPairsPlot(kmc, title="Batters Clustered by G, H, R", ticks=FALSE)
kmc = computeSilhouette(conn, kmc)
createSilhouetteProfile(kmc, title="Cluster Silhouette Histograms (Profiles)")
}
Run the code above in your browser using DataLab