# Simulated data with 100 units, 10 true variables,
# 10 masking variables, 2 hidden clusters
n1 = 50
n2 = 50
n_true_var = 10
n_mask_var = 10
g1 = matrix(rnorm(n1*n_true_var, 0, 1), ncol = n_true_var)
g2 = matrix(rnorm(n2*n_true_var, 2, 1), ncol = n_true_var)
m1 = matrix(runif((n1 + n2)*n_mask_var, min = 0, max = 5), ncol = n_mask_var)
a = cbind(rbind(g1, g2), m1)
## calculate data prototypes using k-means
sl2 <- kmeans(a, 2, iter.max = 100, nstart = 2)
p = sl2$centers
## calculate distances between observations and prototypes
## Remark: d is a 3-dimensions matrix
d = PrtDist(a, p)
## Select 10 most representative variables, use heuristic
lsH <- qVarSelH(d, 10, maxit = 200)
## Select 10 variables, use linear relaxation
require(lpSolveAPI)
lsC <- qVarSelLP(d, 10)
## check optimality
if (abs(lsH$obj - lsC$obj) < 0.001)
message = "Heuristic Solution is Optimal"
Run the code above in your browser using DataLab