# NOT RUN {
#### Example using Diabetes dataset ####
ptm <- proc.time()
# Can delete this line in your work
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
# Replace csvfile with 'your/path'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
head(df)
df$PatientID <- NULL
set.seed(42)
p <- UnsupervisedModelParams$new()
p$df <- df
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$debug <- FALSE
p$cores <- 1
p$numOfClusters <- 3
# Run k means clustering
cl <- KmeansClustering$new(p)
cl$run()
# Get the 2D representation of the cluster solution
cl$get2DClustersPlot()
# Get the output data frame
dfOut <- cl$getOutDf()
head(dfOut)
print(proc.time() - ptm)
#### Example using iris dataset with labels ####
ptm <- proc.time()
library(healthcareai)
data(iris)
head(iris)
set.seed(2017)
p <- UnsupervisedModelParams$new()
p$df <- iris
p$labelCol <- 'Species'
p$impute <- TRUE
p$debug <- FALSE
p$cores <- 1
# Run k means clustering
cl <- KmeansClustering$new(p)
cl$run()
# Get the 2D representation of the cluster solution
cl$get2DClustersPlot()
# Get the output data frame
dfOut <- cl$getOutDf()
head(dfOut)
## Write to CSV (or JSON, MySQL, etc) using plain R syntax
## write.csv(dfOut,'path/clusteringresult.csv')
print(proc.time() - ptm)
# }
Run the code above in your browser using DataLab