# NOT RUN {
################################################################
# Example 1: Solving the simple infinite-horizon Tiger problem
data("Tiger")
Tiger
sol <- solve_POMDP(model = Tiger)
sol
# look at the model
sol$model
# look at solver output
sol$solver_output
# look at the solution
sol$solution
# policy (value function (alpha vectors), optimal action and observation dependent transitions)
policy(sol)
# plot the policy graph of the infinite-horizon POMDP
plot_policy_graph(sol)
# value function
plot_value_function(sol, ylim = c(0,20))
# display available solver options which can be passed on to the solver as parameters.
solve_POMDP_parameter()
################################################################
# Example 2: Solve a problem specified as a POMDP file
# using a grid of size 10
sol <- solve_POMDP("http://www.pomdp.org/examples/cheese.95.POMDP",
method = "grid", parameter = list(fg_points = 10))
sol
policy(sol)
# Example 3: Solving a finite-horizon POMDP using the incremental
# pruning method (without discounting)
sol <- solve_POMDP(model = Tiger,
horizon = 3, discount = 1, method = "incprune")
sol
# look at the policy tree
policy(sol)
# note: it does not make sense to open the door in epochs 1 or 2 if you only have 3 epochs.
reward(sol) # listen twice and then open the door or listen 3 times
reward(sol, belief = c(1,0)) # listen twice (-2) and then open-left (10)
reward(sol, belief = c(1,0), epoch = 3) # just open the right door (10)
reward(sol, belief = c(.95,.05), epoch = 3) # just open the right door (95% chance)
################################################################
# Example 3: Using terminal values
#
# Specify 1000 if the tiger is right after 3 (horizon) epochs
sol <- solve_POMDP(model = Tiger,
horizon = 3, discount = 1, method = "incprune",
terminal_values = c(0, 1000))
sol
policy(sol)
# Note: the optimal strategy is never to open the left door, because we think the
# tiger is there then we better wait to get 1000 as the terminal value. If we think
# the Tiger is to the left then open the right door and have a 50/50 chance that the
# Tiger will go to the right door.
################################################################
# Example 4: Model time-dependent transition probabilities
# The tiger reacts normally for 3 epochs (goes randomly two one
# of the two doors when a door was opened). After 3 epochs he gets
# scared and when a door is opened then he always goes to the other door.
# specify the horizon for each of the two differnt episodes
Tiger_time_dependent <- Tiger
Tiger_time_dependent$model$name <- "Scared Tiger Problem"
Tiger_time_dependent$model$horizon <- c(normal_tiger = 3, scared_tiger = 3)
Tiger_time_dependent$model$transition_prob <- list(
normal_tiger = list(
"listen" = "identity",
"open-left" = "uniform",
"open-right" = "uniform"),
scared_tiger = list(
"listen" = "identity",
"open-left" = rbind(c(0, 1), c(0, 1)),
"open-right" = rbind(c(1, 0), c(1, 0))
)
)
Tiger_time_dependent
sol <- solve_POMDP(model = Tiger_time_dependent, discount = 1, method = "incprune")
sol
policy(sol)
################################################################
# Example 5: Alternative method to solve time-dependent POMDPs
# 1) create the scared tiger model
Tiger_scared <- Tiger
Tiger_scared$model$transition_prob <- list(
"listen" = "identity",
"open-left" = rbind(c(0, 1), c(0, 1)),
"open-right" = rbind(c(1, 0), c(1, 0))
)
# 2) Solve in reverse order. Scared tiger without terminal values first.
sol_scared <- solve_POMDP(model = Tiger_scared,
horizon = 3, discount = 1, method = "incprune")
sol_scared
policy(sol_scared)
# 3) Solve the regular tiger with the value function of the scared tiger as terminal values
sol <- solve_POMDP(model = Tiger,
horizon = 3, discount = 1, method = "incprune",
terminal_values = sol_scared$solution$alpha[[1]])
sol
policy(sol)
# note: it is optimal to mostly listen till the Tiger gets in the scared mood. Only if we are
# extremely sure in the first epoch, then opening a door is optimal.
################################################################
# Example 6: PBVI with a custom grid
# Create a search grid by sampling from the belief space in
# 10 regular intervals
custom_grid <- sample_belief_space(Tiger, n = 10, method = "regular")
custom_grid
# Visualize the search grid
plot_belief_space(sol, sample = custom_grid)
# Solve the POMDP using the grid for approximation
sol <- solve_POMDP(Tiger, method = "grid", parameter = list(grid = custom_grid))
sol
# }
Run the code above in your browser using DataLab