# NOT RUN {
## Defining the Tiger Problem (it is also available via data(Tiger), see ? Tiger)
Tiger <- POMDP(
name = "Tiger Problem",
discount = 0.75,
states = c("tiger-left" , "tiger-right"),
actions = c("listen", "open-left", "open-right"),
observations = c("tiger-left", "tiger-right"),
start = "uniform",
transition_prob = list(
"listen" = "identity",
"open-left" = "uniform",
"open-right" = "uniform"
),
observation_prob = list(
"listen" = rbind(c(0.85, 0.15),
c(0.15, 0.85)),
"open-left" = "uniform",
"open-right" = "uniform"
),
# the reward helper expects: action, start.state, end.state, observation, value
# missing arguments default to '*' matching any value.
reward = rbind(
R_("listen", v = -1),
R_("open-left", "tiger-left", v = -100),
R_("open-left", "tiger-right", v = 10),
R_("open-right", "tiger-left", v = 10),
R_("open-right", "tiger-right", v = -100)
)
)
Tiger
# Defining the Tiger problem using functions
trans_f <- function(action, start.state, end.state) {
if(action == 'listen')
if(end.state == start.state) return(1)
else return(0)
return(1/2) ### all other actions have a uniform distribution
}
obs_f <- function(action, end.state, observation) {
if(action == 'listen')
if(end.state == observation) return(0.85)
else return(0.15)
return(1/2)
}
rew_f <- function(action, start.state, end.state, observation) {
if(action == 'listen') return(-1)
if(action == 'open-left' && start.state == 'tiger-left') return(-100)
if(action == 'open-left' && start.state == 'tiger-right') return(10)
if(action == 'open-right' && start.state == 'tiger-left') return(10)
if(action == 'open-right' && start.state == 'tiger-right') return(-100)
stop('Not possible')
}
Tiger_func <- POMDP(
name = "Tiger Problem",
discount = 0.75,
states = c("tiger-left" , "tiger-right"),
actions = c("listen", "open-left", "open-right"),
observations = c("tiger-left", "tiger-right"),
start = "uniform",
transition_prob = trans_f,
observation_prob = obs_f,
reward = rew_f
)
Tiger_func
# }
Run the code above in your browser using DataLab