library("polle")
### Two stages:
d <- sim_two_stage(5e2, seed=1)
pd <- policy_data(d,
action = c("A_1", "A_2"),
baseline = c("BB"),
covariates = list(L = c("L_1", "L_2"),
C = c("C_1", "C_2")),
utility = c("U_1", "U_2", "U_3"))
pd
### V-restricted (Doubly Robust) Q-learning
# specifying the learner:
pl <- policy_learn(
type = "drql",
control = control_drql(qv_models = list(q_glm(formula = ~ C_1 + BB),
q_glm(formula = ~ L_1 + BB))),
full_history = TRUE
)
# evaluating the learned policy
pe <- policy_eval(policy_data = pd,
policy_learn = pl,
q_models = q_glm(),
g_models = g_glm())
pe
# getting the policy object:
po <- get_policy_object(pe)
# inspecting the fitted QV-model for each action strata at stage 1:
po$qv_functions$stage_1
head(get_policy(pe)(pd))
Run the code above in your browser using DataLab