library(pls)
data(gasoline, package = "pls")
# PVR: Select 10 variables using all PCs in voting
pvr_result <- PVR(gasoline$NIR, gasoline$octane, nvar = 10)
# Compare with PCR using all variables
pcr_result <- pcr(octane ~ NIR, ncomp = 10, data = gasoline,
validation = "CV", scale = FALSE)
# Compare X-variance and Y-variance explained
par(mfrow = c(1, 2))
plot(cumsum(pvr_result$ssEX), type = "b", col = "blue",
xlab = "Number of Variables/Components",
ylab = "Cumulative % X-Variance",
main = "X-Variance: PVR vs PCR",
ylim = c(50, 100))
pcr_xvar <- 100 * cumsum(pcr_result$Xvar) / pcr_result$Xtotvar
lines(seq_along(pcr_xvar), pcr_xvar, type = "b", col = "red")
legend("bottomright", legend = c("PVR (10 vars)", "PCR (10 comps)"),
col = c("blue", "red"), lty = 1, pch = 1)
plot(cumsum(pvr_result$ssEY), type = "b", col = "blue",
xlab = "Number of Variables/Components",
ylab = "Cumulative % Y-Variance",
main = "Y-Variance: PVR vs PCR",
ylim = c(0, 100))
pcr_yvar <- 100 * R2(pcr_result)$val[1,1,-1]
lines(seq_along(pcr_yvar), pcr_yvar, type = "b", col = "red")
legend("bottomright", legend = c("PVR (10 vars)", "PCR (10 comps)"),
col = c("blue", "red"), lty = 1, pch = 1)
par(mfrow = c(1, 1))
# Predict using selected variables
X_selected <- gasoline$NIR[, pvr_result$ids]
y_pred_pvr <- cbind(1, X_selected) %*% pvr_result$betas[, ncol(pvr_result$betas)]
y_pred_pcr <- predict(pcr_result, ncomp = 10, newdata = gasoline)
# Compare RMSE (training error - same data used for fitting)
rmse_pvr <- sqrt(mean((gasoline$octane - y_pred_pvr)^2))
rmse_pcr <- sqrt(mean((gasoline$octane - y_pred_pcr)^2))
cat("RMSE - PVR:", round(rmse_pvr, 4), "\n")
cat("RMSE - PCR:", round(rmse_pcr, 4), "\n")
Run the code above in your browser using DataLab