# NOT RUN {
data(Maze)
Maze
# use value iteration
maze_solved <- solve_MDP(Maze, method = "value")
policy(maze_solved)
# value function (utility function U)
plot_value_function(maze_solved)
# Q-function (states times action)
q_values_MDP(maze_solved)
# use modified policy iteration
maze_solved <- solve_MDP(Maze, method = "policy")
policy(maze_solved)
# finite horizon
maze_solved <- solve_MDP(Maze, method = "value", horizon = 3)
policy(maze_solved)
# create a random policy where action n is very likely and approximate
# the value function. We change the discount factor to .9 for this.
Maze_discounted <- Maze
Maze_discounted$discount <- .9
pi <- random_MDP_policy(Maze_discounted, prob = c(n = .7, e = .1, s = .1, w = 0.1))
pi
# compare the utility function for the random policy with the function for the optimal
# policy found by the solver.
maze_solved <- solve_MDP(Maze)
approx_MDP_policy_evaluation(pi, Maze, k_backup = 100)
approx_MDP_policy_evaluation(policy(maze_solved)[[1]], Maze, k_backup = 100)
# Note that the solver already calculates the utility function and returns it with the policy
policy(maze_solved)
# }
Run the code above in your browser using DataLab