dat <- generate_clustered_mar(
n = 80, m = 4, d = 2,
alpha0 = -0.4, alpha = c(-1.0, 0.8),
target_missing = 0.30,
seed = 1
)
x_cols <- c("X1", "X2")
## Logistic regression
fit_log <- fit_missingness_propensity(dat, "delta", x_cols, method = "logistic")
p_log <- fit_log$predict(dat[, x_cols, drop = FALSE])
head(p_log)
# \donttest{
## Compare with other methods
## True propensity under the generator
s <- attr(dat, "alpha_shift")
eta <- (-0.4) + (-1.0) * dat$X1 + 0.8 * dat$X2
pi_true <- 1 / (1 + exp(-pmin(pmax(eta, -30), 30)))
fit_grf <- fit_missingness_propensity(
dat, "delta", x_cols,
method = "grf", num.trees = 800, num.threads = 1
)
fit_xgb <- fit_missingness_propensity(
dat, "delta", x_cols,
method = "boosting",
nrounds = 300,
params = list(max_depth = 3, eta = 0.05, subsample = 0.8, colsample_bytree = 0.8),
nthread = 1
)
p_grf <- fit_grf$predict(dat[, x_cols, drop = FALSE])
p_xgb <- fit_xgb$predict(dat[, x_cols, drop = FALSE])
op <- par(mfrow = c(1, 3))
plot(pi_true, p_log, pch = 16, cex = 0.5,
xlab = "True pi(x)", ylab = "Estimated pi-hat(x)", main = "Logistic"); abline(0, 1, lwd = 2)
plot(pi_true, p_grf, pch = 16, cex = 0.5,
xlab = "True pi(x)", ylab = "Estimated pi-hat(x)", main = "GRF"); abline(0, 1, lwd = 2)
plot(pi_true, p_xgb, pch = 16, cex = 0.5,
xlab = "True pi(x)", ylab = "Estimated pi-hat(x)", main = "Boosting"); abline(0, 1, lwd = 2)
par(op)
# }
Run the code above in your browser using DataLab