# NOT RUN {
data(Maze)
# solve the POMDP for 5 epochs and no discounting
sol <- solve_MDP(Maze, discount = 1)
sol
policy(sol)
## Example 1: simulate 10 trajectories, only the final belief state is returned
sim <- simulate_MDP(sol, n = 10, horizon = 10, verbose = TRUE)
head(sim)
# additional data is available as attributes
names(attributes(sim))
attr(sim, "avg_reward")
colMeans(attr(sim, "action"))
## Example 2: simulate starting always in state s_1
sim <- simulate_MDP(sol, n = 100, start = "s_1", horizon = 10)
sim
# the average reward is an estimate of the utility in the optimal policy:
policy(sol)[[1]][1,]
# }
Run the code above in your browser using DataLab