# NOT RUN {
# This is an example to simulate a dataset to demonstrate use of autoencoder
#Set the sample size as 1000 for the simulated dataset
n=1000
#Get the simulated data using random functions
dataDf=data.frame(id=c(1:n),x1=runif(n),x2=rnorm(n,100,10),
x3=runif(n,100,200),x4=rnorm(n,1000,30))
#Set the proportion of the test samples
testProp=0.1
ntest=as.integer(n*testProp)
ntrain=n-ntest
#Obtain the index for the training and testing samples
index_train=sample(c(1:n),ntrain)
index_test=setdiff(c(1:n),index_train)
#Obtain y as analytic solution for x plus random noise
dataDf$y=sqrt(dataDf$x1)+dataDf$x2^0.3+log(dataDf$x3)+dataDf$x4^2+rnorm(n)
#Scale the dataset
scalev = scale(dataDf[,c(2:6)])
col_means = attr(scalev, "scaled:center")
col_stddevs = attr(scalev, "scaled:scale")
# }
# NOT RUN {
#Set the early stopping and learning rate adjustement functions
early_stopping = keras::callback_early_stopping(monitor ='loss', min_delta=0.000001)
reduce=keras::callback_reduce_lr_on_plateau(patience=20)
#Set the parameters
nfea=4;nout=1;nodes=c(32,16,8,4);mdropout=0.2;isres=TRUE;outtype=0;fact="linear"
acts=rep("relu",length(nodes));fact="linear";reg=NULL;batchnorm=TRUE
#Define the residual autoencoder and show its network structure
autoresmodel=AutoEncoderModel(nfea,nout,nodes,acts,mdropout,reg,batchnorm,isres,outtype,fact=fact)
#summary(autoresmodel) #Optional function to show the model
#Define the loss function and compile the models
metric_r2= keras::custom_metric("rsquared", function(y_true, y_pred) {
SS_res =keras::k_sum(keras::k_square(y_true-y_pred ))
SS_tot =keras::k_sum(keras::k_square(( y_true - keras::k_mean(y_true))))
return ( 1 - SS_res/(SS_tot + keras::k_epsilon()))
})
keras::compile(autoresmodel,
loss = "mean_squared_error",
optimizer = keras::optimizer_rmsprop(),
metrics = c("mean_squared_error",metric_r2)
)
#Set the number of maximum epochs
nepoch=70
# Set the train samples and train the model
x_train=scalev[index_train,c(1:4)]
y_train=scalev[index_train,5]
history = keras::fit(autoresmodel,x_train, y_train,
epochs = nepoch, batch_size = 20,
validation_split = 0.2,verbose=1,callbacks=list(early_stopping,reduce)
)
# Show the training curves
trainLoss=data.frame(r2=history$metrics$rsquared)
trainLoss$epoch=c(1:length(history$metrics$rsquared))
trainLoss$val_r2=history$metrics$val_rsquared
#Save the current par setting
curpar = par(no.readonly = TRUE)
#Set the new par setting and make the plots
par(mar=c(4,4,1,1))
plot(trainLoss$epoch,trainLoss$r2,type="l",
xlab="Training epoch",ylab=expression("R"^2))
lines(trainLoss$epoch,trainLoss$val_r2,col="red")
#Predict the test dataset
x_test=scalev[index_test,c(1:4)]
y_test=dataDf[index_test,"y"]
y_pred=predict(autoresmodel,x_test)
#Make inverse scaling
y_pred=y_pred*col_stddevs[5]+col_means[5]
#Show the test results
test_r2=rSquared(y_test,y_test-y_pred)
test_rmse=rmse(y_test,y_pred)
message(paste("test r2:",round(test_r2,2),
"; test RMSE:",round(test_rmse,2),sep=""))
#Restore the previous par setting
par(curpar)
# }
Run the code above in your browser using DataLab