predict.swaglm: predict.swaglm

Description

Predict method for a swaglm object

Usage

# S3 method for swaglm
predict(object, newdata, ...)

Value

A list with two elements:

mat_eta_prediction: A numeric matrix containing the linear predictors (\(\eta = X \hat{\beta}\)) for each observation in newdata for all selected models. Each row corresponds to an observation in newdata, and each column corresponds to one of the selected models.
mat_response_prediction: A numeric matrix containing the predicted responses, obtained by applying the inverse link function to the elements in mat_eta_prediction. The dimensions are identical to mat_eta_prediction, so that for each observation (row) the predicted responses for all selected models are aligned per column.

Arguments

object: An object of class swaglm returned by swaglm.
newdata: A data.frame or matrix containing the same predictors (columns) as used in the original training dataset.
...: Further arguments passed to or from other methods.

Details

Computes predictions from a fitted swaglm object on new data. The function returns the linear predictors (eta) for each selected model and the corresponding predicted responses using the model's inverse link function.

The function performs the following steps:

Checks that newdata has the same number of columns as the design matrix used in the fitted swaglm object.
Computes the linear predictors (\(\eta = X \hat{\beta}\)) for each selected model in the swaglm object.
Applies the inverse of the model's link function to compute predicted responses.

Examples

Run this code

set.seed(12345)
n <- 2000
p <- 100
# create design matrix and vector of coefficients
Sigma <- diag(rep(1/p, p))
X <- MASS::mvrnorm(n = n, mu = rep(0, p), Sigma = Sigma)
beta = c(10, rep(0,p-1))
sigma2=2
y <- 1 + X%*%beta + rnorm(n, mean = 0, sd = sqrt(sigma2))

# subset data
n_data_train = n-5
X_sub = X[1:n_data_train, ]
y_sub = y[1:n_data_train]

# plot train data
plot(X_sub[,1], y_sub)
abline(a=1, b=beta[1])

# run swag
swaglm_obj = swaglm(X = X_sub, y = y_sub, p_max = 15, family = gaussian(),
                    method = 0, alpha = .15, verbose = TRUE)

# compute prediction
X_to_predict = X[(n_data_train+1):(dim(X)[1]), ]
y_pred = predict(swaglm_obj, newdata = X_to_predict)
n_selected_model = dim(y_pred$mat_eta_prediction)[2]

# in that case mat_eta_prediction and mat_reponse_prediction are the same
all.equal(y_pred$mat_eta_prediction, y_pred$mat_reponse_prediction)

# compute average prediction (accross selected models)
y_pred_mean = apply(y_pred$mat_reponse_prediction, MARGIN = 1, FUN = mean)

# plot
y_to_predict = y[(n_data_train+1):(dim(X)[1])]
plot(X_to_predict[,1], y_to_predict, col="red", ylim=c(-4,4), ylab="y", xlab="X")
# add all prediction
for(i in seq(dim(X_to_predict)[1])){
  # i=1
  points(x = rep(X_to_predict[i,1],n_selected_model ),
         y = y_pred$mat_reponse_prediction[i,])
}
points(x =X_to_predict[,1], y = y_pred_mean, col="orange")
abline(a=1, b=beta[1])
legend(
  "topleft",
  legend = c(
    "True values (test set)",
    "Predictions (all selected models)",
    "Average prediction",
    "True regression line"
  ),
  col = c("red", "black", "orange", "black"),
  pch = c(1, 1, 1, NA),
  lty = c(NA, NA, NA, 1),
  cex = 0.8,
  bty="n"
)



# ----------------------------------- logistic regression

set.seed(12345)
n <- 2000
p <- 100
# create design matrix and vector of coefficients
Sigma <- diag(rep(1/p, p))
X <- MASS::mvrnorm(n = n, mu = rep(0, p), Sigma = Sigma)
beta = c(-15,-10,5,10,15, rep(0,p-5))

# --------------------- generate from logistic regression with an intercept of one
z <- 1 + X%*%beta
pr <- 1/(1 + exp(-z))
y <- as.factor(rbinom(n, 1, pr))
y = as.numeric(y)-1

# subset data
n_data_train = n-200
X_sub = X[1:n_data_train, ]
y_sub = y[1:n_data_train]

swaglm_obj = swaglm::swaglm(X=X_sub, y = y_sub, p_max = 20, family = stats::binomial(),
                            alpha = .15, verbose = TRUE, seed = 123)

# compute prediction
X_to_predict = X[(n_data_train+1):(dim(X)[1]), ]
y_pred = predict(swaglm_obj, newdata = X_to_predict)
y_pred_majority_class <- ifelse(rowMeans(y_pred$mat_reponse_prediction) >= 0.5, 1, 0)
y_to_predict = y[(n_data_train+1):(dim(X)[1])]

# tabulate
table(True = y_to_predict, Predicted = y_pred_majority_class)

Run the code above in your browser using DataLab