# NOT RUN {
## Michael's Sleepy Tiger Problem is a MDP with perfect observability
Tiger_MDP <- MDP(
name = "Michael's Sleepy Tiger Problem",
discount = 1,
states = c("tiger-left" , "tiger-right"),
actions = c("open-left", "open-right", "do-nothing"),
start = "tiger-left",
transition_prob = list(
"open-left" = "uniform",
"open-right" = "uniform",
"do-nothing" = "identity"),
# the rew helper expects: action, start.state, end.state, observation, value
reward = rbind(
R_("open-left", "tiger-left", v = -100),
R_("open-left", "tiger-right", v = 10),
R_("open-right", "tiger-left", v = 10),
R_("open-right", "tiger-right", v = -100),
R_("do-nothing", v = 0)
)
)
Tiger_MDP
# do 5 epochs with no discounting
s <- solve_POMDP(Tiger_MDP, method = "enum", horizon = 5)
s
# value function and policy
plot_value_function(s)
policy(s)
# }
Run the code above in your browser using DataLab