## See also additional material available in
## YOUR_R_DIR/library/mixAK/doc/
## or YOUR_R_DIR/site-library/mixAK/doc/
## - files Galaxy.pdf, Faithful.pdf, Tandmob.pdf
## Galaxy.R, Faithful.R, Tandmob.R
## ==============================================
## Simple analysis of Anderson's iris data
## ==============================================
library("colorspace")
data(iris, package="datasets")
summary(iris)
VARS <- names(iris)[1:4]
#COLS <- rainbow_hcl(3, start = 60, end = 240)
COLS <- c("red", "darkblue", "darkgreen")
names(COLS) <- levels(iris[, "Species"])
### Prior distribution and the length of MCMC
Prior <- list(priorK = "fixed", Kmax = 3)
nMCMC <- c(burn=5000, keep=10000, thin=5, info=1000)
### Run MCMC
set.seed(20091230)
fit <- NMixMCMC(y0 = iris[, VARS], prior = Prior, nMCMC = nMCMC)
### Basic posterior summary
print(fit)
### Univariate marginal posterior predictive densities
### based on chain #1
pdens1 <- NMixPredDensMarg(fit[[1]], lgrid=150)
plot(pdens1)
plot(pdens1, main=VARS, xlab=VARS)
### Bivariate (for each pair of margins) predictive densities
### based on chain #1
pdens2a <- NMixPredDensJoint2(fit[[1]])
plot(pdens2a)
plot(pdens2a, xylab=VARS)
plot(pdens2a, xylab=VARS, contour=TRUE)
### Determine the grid to compute bivariate densities
grid <- list(Sepal.Length=seq(3.5, 8.5, length=75),
Sepal.Width=seq(1.8, 4.5, length=75),
Petal.Length=seq(0, 7, length=75),
Petal.Width=seq(-0.2, 3, length=75))
pdens2b <- NMixPredDensJoint2(fit[[1]], grid=grid)
plot(pdens2b, xylab=VARS)
### Plot with contours
ICOL <- rev(heat_hcl(20, c=c(80, 30), l=c(30, 90), power=c(1/5, 2)))
oldPar <- par(mfrow=c(2, 3), bty="n")
for (i in 1:3){
for (j in (i+1):4){
NAME <- paste(i, "-", j, sep="")
MAIN <- paste(VARS[i], "x", VARS[j])
image(pdens2b$x[[i]], pdens2b$x[[j]], pdens2b$dens[[NAME]], col=ICOL,
xlab=VARS[i], ylab=VARS[j], main=MAIN)
contour(pdens2b$x[[i]], pdens2b$x[[j]], pdens2b$dens[[NAME]], add=TRUE, col="brown4")
}
}
### Plot with data
for (i in 1:3){
for (j in (i+1):4){
NAME <- paste(i, "-", j, sep="")
MAIN <- paste(VARS[i], "x", VARS[j])
image(pdens2b$x[[i]], pdens2b$x[[j]], pdens2b$dens[[NAME]], col=ICOL,
xlab=VARS[i], ylab=VARS[j], main=MAIN)
for (spec in levels(iris[, "Species"])){
Data <- subset(iris, Species==spec)
points(Data[,i], Data[,j], pch=16, col=COLS[spec])
}
}
}
### Set the graphical parameters back to their original values
par(oldPar)
### Clustering based on posterior summary statistics of component allocations
### or on the posterior distribution of component allocations
### (these are two equivalent estimators of probabilities of belonging
### to each mixture components for each observation)
p1 <- fit[[1]]$poster.comp.prob1
p2 <- fit[[1]]$poster.comp.prob2
### Clustering based on posterior summary statistics of mixture weight, means, variances
p3 <- NMixPlugDA(fit[[1]], iris[, VARS])
p3 <- p3[, paste("prob", 1:3, sep="")]
### Observations from "setosa" species (all would be allocated in component 1)
apply(p1[1:50,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p2[1:50,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p3[1:50,], 2, quantile, prob=seq(0, 1, by=0.1))
### Observations from "versicolor" species (almost all would be allocated in component 2)
apply(p1[51:100,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p2[51:100,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p3[51:100,], 2, quantile, prob=seq(0, 1, by=0.1))
### Observations from "virginica" species (all would be allocated in component 3)
apply(p1[101:150,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p2[101:150,], 2, quantile, prob=seq(0, 1, by=0.1))
apply(p3[101:150,], 2, quantile, prob=seq(0, 1, by=0.1))
Run the code above in your browser using DataLab