# NOT RUN {
# The problem can be loaded using data(Maze).
# Here is the complete problem definition:
S <- paste0("s_", seq_len(3 * 4))
s2rc <- function(s) {
if(is.character(s)) s <- match(s, S)
c((s - 1) %% 3 + 1, (s - 1) %/% 3 + 1)
}
rc2s <- function(rc) S[rc[1] + 3 * (rc[2] - 1)]
A <- c("north", "south", "east", "west")
T <- function(action, start.state, end.state) {
action <- match.arg(action, choices = A)
if (start.state %in% c('s_11', 's_12', 's_5')) {
if (start.state == end.state) return(1)
else return(0)
}
if(action %in% c("north", "south")) error_direction <- c("east", "west")
else error_direction <- c("north", "south")
rc <- s2rc(start.state)
delta <- list(north = c(+1, 0), south = c(-1, 0),
east = c(0, +1), west = c(0, -1))
P <- matrix(0, nrow = 3, ncol = 4)
add_prob <- function(P, rc, a, value) {
new_rc <- rc + delta[[a]]
if (new_rc[1] > 3 || new_rc[1] < 1 || new_rc[2] > 4 || new_rc[2] < 1
|| (new_rc[1] == 2 && new_rc[2]== 2))
new_rc <- rc
P[new_rc[1], new_rc[2]] <- P[new_rc[1], new_rc[2]] + value
P
}
P <- add_prob(P, rc, action, .8)
P <- add_prob(P, rc, error_direction[1], .1)
P <- add_prob(P, rc, error_direction[2], .1)
P[rbind(s2rc(end.state))]
}
T("n", "s_1", "s_2")
R <- rbind(
R_(end.state = '*', value = -0.04),
R_(end.state = 's_11', value = -1),
R_(end.state = 's_12', value = +1),
R_(start.state = 's_11', value = 0),
R_(start.state = 's_12', value = 0),
R_(start.state = 's_5', value = 0)
)
Maze <- MDP(
name = "Stuart Russell's 3x4 Maze",
discount = 1,
horizon = Inf,
states = S,
actions = A,
transition_prob = T,
reward = R
)
Maze
str(Maze)
maze_solved <- solve_MDP(Maze, method = "value")
policy(maze_solved)
# show the utilities and optimal actions organized in the maze layout (like in the AIMA textbook)
matrix(policy(maze_solved)[[1]]$U, nrow = 3, dimnames = list(1:3, 1:4))[3:1, ]
matrix(policy(maze_solved)[[1]]$action, nrow = 3, dimnames = list(1:3, 1:4))[3:1, ]
# Note: the optimal actions for the states with a utility of 0 are artefacts and should be ignored.
# }
Run the code above in your browser using DataLab