set.seed(123)
N <- 200
X <- matrix(rnorm(N * 5), nrow = N, ncol = 5)
# Simulate treatment assignment
A <- rbinom(N, 1, plogis(X[, 1] - 0.5 * X[, 2]))
# Simulate outcome
Y_full <- rbinom(N, 1, plogis(0.5 * X[, 1] - 0.25 * X[, 3]))
# Introduce some missingness to simulate semi-supervised data
Y <- Y_full
Y[sample(1:N, size = N/4)] <- NA # 25% missing
# Create R vector (labelled = 1, unlabelled = 0)
R <- ifelse(!is.na(Y), 1, 0)
# Cross-validation fold assignment
foldid <- sample(rep(1:5, length.out = N))
# Run cf with glm model
result <- cf(Y = Y, X = X, nfold = 5, R = R, foldid = foldid, cf_model = "glm")
# Examine output
print(result$log_losses)
print(result$best_rounds_index)
Run the code above in your browser using DataLab