Learn R Programming

MDP2 (version 2.1.2)

loadMDP: Load the HMDP model defined in the binary files. The model are created in memory using the external C++ library.

Description

Load the HMDP model defined in the binary files. The model are created in memory using the external C++ library.

Usage

loadMDP(
  prefix = "",
  binNames = c("stateIdx.bin", "stateIdxLbl.bin", "actionIdx.bin", "actionIdxLbl.bin",
    "actionWeight.bin", "actionWeightLbl.bin", "transProb.bin", "externalProcesses.bin"),
  eps = 1e-05,
  check = TRUE,
  verbose = FALSE,
  getLog = TRUE
)

Value

A list containing relevant information about the model such as model file names (binNames), time horizon (timeHorizon), number of states (states), number of states at last stage of the founder process (founderStatesLast), number of actions (actions), number of levels (levels), names of the weights associated to each action (weightNames) and a pointer ptr to the model object in memory. Note for models with an infinite time-horizon the states at the founder level is repeated at stage two so have something aka a double array representation.

Arguments

prefix

A character string with the prefix added to binNames. Used to identify a specific model.

binNames

A character vector of length 7 giving the names of the binary files storing the model.

eps

The sum of the transition probabilities must at most differ eps from one.

check

Check if the MDP seems correct.

verbose

More output when running algorithms.

getLog

Output the log messages.

Examples

Run this code
## Set working dir
wd <- setwd(tempdir())

# Create the small machine repleacement problem used as an example in L.R. Nielsen and A.R.
# Kristensen. Finding the K best policies in a finite-horizon Markov decision process. European
# Journal of Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011.

## Create the MDP using a dummy replacement node
prefix<-"machine1_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
      w$state(label="replaced")       # v=(3,3)
         w$action(label="Dummy", weights=0, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good", end=TRUE)        # v=(4,0)
      w$state(label="average", end=TRUE)     # v=(4,1)
      w$state(label="not working", end=TRUE) # v=(4,2)
      w$state(label="replaced", end=TRUE)    # v=(4,3)
   w$endStage()
w$endProcess()
w$closeWriter()

## Load the model into memory
mdp<-loadMDP(prefix)
mdp
plot(mdp)

getInfo(mdp, withList = FALSE)
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
scrapValues<-c(30,10,5,0)   # scrap values (the values of the 4 states at stage 4)
runValueIte(mdp, w, termValues=scrapValues)
getPolicy(mdp)     # optimal policy

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  



# The example given in L.R. Nielsen and A.R. Kristensen. Finding the K best
# policies in a finite-horizon Markov decision process. European Journal of
# Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011,
# does actually not have any dummy replacement node as in the MDP above. The same
# model can be created using a single dummy node at the end of the process.

## Create the MDP using a single dummy node
prefix<-"machine2_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE) # transition to sId=12 (Dummy)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good")        # v=(4,0)
         w$action(label="rep", weights=30, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="average")     # v=(4,1)
         w$action(label="rep", weights=10, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="not working") # v=(4,2)
         w$action(label="rep", weights=5, prob=c(1,0,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=5
      w$state(label="Dummy", end=TRUE)        # v=(5,0)
   w$endStage()
w$endProcess()
w$closeWriter()

## Have a look at the state-expanded hypergraph
mdp<-loadMDP(prefix)
mdp
plot(mdp)

getInfo(mdp, withList = FALSE)
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
runValueIte(mdp, w, termValues = 0)
getPolicy(mdp)     # optimal policy

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  


## Reset working dir
setwd(wd)

Run the code above in your browser using DataLab