# NOT RUN {
# Import data.
fname <- system.file("extdata", "finndiane.txt", package = "Numero")
dataset <- read.delim(file = fname)
# Prepare training data.
trvars <- c("CHOL", "HDL2C", "TG", "CREAT", "uALB")
trdata <- scale.default(dataset[,trvars])
# K-means clustering.
km <- nroKmeans(data = trdata)
# Self-organizing map.
sm <- nroKohonen(seeds = km)
sm <- nroTrain(map = sm, data = trdata)
# Assign data points into districts.
matches <- nroMatch(centroids = sm, data = trdata)
# Calculate district averages for urinary albumin.
plane <- nroAggregate(topology = sm, districts = matches,
data = dataset$uALB)
plane <- as.vector(plane[[1]])
# Assign subgroups based on urinary albumin.
regns <- rep("HighAlb", length.out=length(plane))
regns[which(plane < quantile(plane, 0.67))] <- "MiddleAlb"
regns[which(plane < quantile(plane, 0.33))] <- "LowAlb"
# Add label info and make a data frame.
regns <- data.frame(REGION=regns, REGION.label="",
stringsAsFactors=FALSE)
regns[which(regns$REGION == "HighAlb"),"REGION.label"] <- "H"
regns[which(regns$REGION == "MiddleAlb"),"REGION.label"] <- "M"
regns[which(regns$REGION == "LowAlb"),"REGION.label"] <- "L"
# Calculate summary statistics.
st <- nroSummary(data = dataset, districts = matches, regions = regns)
print(st[,c("VARIABLE","SUBGROUP","MEAN","P.chisq","P.t","P.anova")])
# }
Run the code above in your browser using DataLab