# NOT RUN {
data(Tiger)
# solve the POMDP for 5 epochs and no discounting
sol <- solve_POMDP(Tiger, horizon = 5, discount = 1, method = "enum")
sol
policy(sol)
## Example 1: simulate 10 trajectories, only the final belief state is returned
sim <- simulate_POMDP(sol, n = 100, verbose = TRUE)
head(sim)
# plot the final belief state, look at the average reward and how often different actions were used.
plot_belief_space(sol, sample = sim)
# additional data is available as attributes
names(attributes(sim))
attr(sim, "avg_reward")
colMeans(attr(sim, "action"))
## Example 2: look at all belief states in the trajectory starting with an initial start belief.
sim <- simulate_POMDP(sol, n = 100, belief = c(.5, .5), visited_beliefs = TRUE)
# plot with added density
plot_belief_space(sol, sample = sim, ylim = c(0,3))
lines(density(sim[, 1], bw = .05)); axis(2); title(ylab = "Density")
## Example 3: simulate trajectories for an unsolved POMDP using randomized actions
sim <- simulate_POMDP(Tiger, n = 100, horizon = 5,
random_actions = TRUE, visited_beliefs = TRUE)
plot_belief_space(sol, sample = sim, ylim = c(0,6))
lines(density(sim[, 1], bw = .05)); axis(2); title(ylab = "Density")
# }
Run the code above in your browser using DataLab