### Two stages:
d <- sim_two_stage(5e2, seed=1)
pd <- policy_data(d,
action = c("A_1", "A_2"),
covariates = list(L = c("L_1", "L_2"),
C = c("C_1", "C_2")),
utility = c("U_1", "U_2", "U_3"))
# defining a dynamic policy:
p <- policy_def(
function(L) (L>0)*1,
reuse = TRUE
)
p
head(p(pd), 5)
# V-restricted (Doubly Robust) Q-learning:
# specifying the learner:
pl <- policy_learn(type = "drql",
control = control_drql(qv_models = q_glm(formula = ~ C)))
# fitting the policy (object):
po <- pl(policy_data = pd,
q_models = q_glm(),
g_models = g_glm())
p <- get_policy(po)
p
head(p(pd))
Run the code above in your browser using DataLab