computeClusterSample(channel, km, sampleFraction, sampleSize, scaled = FALSE, includeId = TRUE, test = FALSE)
odbcConnect
."toakmeans"
obtained with computeKmeans
.km
object where
vector length must be equal to the number of clusters.sampleFraction
is missing).
Multiple sizes define sampling for each cluster in kmeans km
object where
vector length must be equal to the number of clusters.computeClusterSample
returns an object of class "toakmeans"
(compatible with class "kmeans"
).
computeKmeans
if(interactive()){
# initialize connection to Lahman baseball database in Aster
conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
km = computeKmeans(conn, "batting", centers=5, iterMax = 25,
aggregates = c("COUNT(*) cnt", "AVG(g) avg_g", "AVG(r) avg_r", "AVG(h) avg_h"),
id="playerid || '-' || stint || '-' || teamid || '-' || yearid",
include=c('g','r','h'), scaledTableName='kmeans_test_scaled',
centroidTableName='kmeans_test_centroids',
where="yearid > 2000")
km = computeClusterSample(conn, km, 0.01)
km
createClusterPairsPlot(km, title="Batters Clustered by G, H, R", ticks=FALSE)
# per cluster sample fractions
km = computeClusterSample(conn, km, c(0.01, 0.02, 0.03, 0.02, 0.01))
}
Run the code above in your browser using DataLab