# \donttest{
require(datanugget)
require(rstiefel)
#4-dim small example with cluster stuctures in V3 and V4
X = cbind.data.frame(V1 = rnorm(5*10^3,mean = 5,sd = 2),
V2 = rnorm(5*10^3,mean = 5,sd = 1),
V3 = c(rnorm(3*10^3,sd = 0.3),
rnorm(2*10^3,mean = 2, sd = 0.3)),
V4 = c(rnorm(1*10^3,mean = -8, sd = 1),
rnorm(3*10^3,mean = 0,sd = 1),
rnorm(1*10^3,mean = 7, sd = 1.5)))
#raw data is recommended to be scaled firstly to generate data nuggets for Projection Pursuit
X = as.data.frame(scale(X))
#create data nuggets
my.DN = create.DN(x = X,
R = 500,
delete.percent = .1,
DN.num1 = 500,
DN.num2 = 250,
no.cores = 2,
make.pbs = FALSE)
#refine data nuggets
my.DN2 = refine.DN(x = X,
DN = my.DN,
EV.tol = .9,
min.nugget.size = 2,
max.splits = 5,
no.cores = 2,
make.pbs = FALSE)
#get nugget centers, weights, and scales
nugg = my.DN2$`Data Nuggets`[,2:(ncol(X)+1)]
weight = my.DN2$`Data Nuggets`$Weight
scale = my.DN2$`Data Nuggets`$Scale
#spherize data nugget centers considering weightsn to conduct Projection Pursuit
wsph.res = wsph(nugg,weight)
nugg_wsph = wsph.res$data_wsph
wsph_proj = wsph.res$wsph_proj
#conduct the same spherization projection on the standardized raw data
X_cen = X- as.matrix(rep(1,nrow(X)))%*%wsph.res$wmean
X_sph = as.matrix(X_cen)%*%wsph_proj
#conduct Projection Pursuit in 2-dim by optimizing Natural Hermite index
res = PPnuggOptim(NHnugg, nugg_wsph, dimproj = 2, weight = weight, scale = scale)
#optimal projection matrix obtained
proj_opt = res$proj.opt
#plot projected data nuggets
plotNugg(nugg_wsph%*%proj_opt,weight,qt = 0.8)
#conduct varimax rotation for projection
fa = faProj(nugg,weight,proj = proj_opt)
#obtain rotated projected data nuggets and
#corresponding loadings of original variables
nuggproj_rotat = fa$nuggproj_rotat
loadings = fa$loadings
#plot rotated projected data nuggets after varimax rotation
plotNugg(nuggproj_rotat,weight,qt = 0.8)
#plot corresponding projected raw big data after factor roation
X_proj = as.matrix(X_cen)%*%loadings
plot(X_proj,cex = 0.5)
#plot loadings of original variables
#V3 and V4 have large loadings, same as the simulation setting.
plotLoadings(loadings)
# }
Run the code above in your browser using DataLab