set.seed(123)
train_idx <- sample(1:nrow(admix$X), 100)
# Note: ^ shuffling is important here! Keeps test and train groups comparable.
train <- list(X = admix$X[train_idx,], y = admix$y[train_idx])
train_design <- create_design(X = train$X, y = train$y)
test <- list(X = admix$X[-train_idx,], y = admix$y[-train_idx])
fit <- plmm(design = train_design)
# make predictions for all lambda values
pred1 <- predict(object = fit, newX = test$X, type = "lp")
pred2 <- predict(object = fit, newX = test$X, type = "blup", X = train$X)
# look at mean squared prediction error
mspe <- apply(pred1, 2, function(c){crossprod(test$y - c)/length(c)})
min(mspe)
mspe_blup <- apply(pred2, 2, function(c){crossprod(test$y - c)/length(c)})
min(mspe_blup) # BLUP is better
# compare the MSPE of our model to a null model, for reference
# null model = intercept only -> y_hat is always mean(y)
crossprod(mean(test$y) - test$y)/length(test$y)
Run the code above in your browser using DataLab