PF_EM: EM Estimation with Particle Filters and Smoothers

Description

Method to estimate the hyper parameters with an EM algorithm.

Usage

PF_EM(formula, data, model = "logit", by, max_T, id, a_0, Q_0, Q, order = 1,
  control = PF_control(...), trace = 0, seed = NULL, ...)

Arguments

formula

coxph like formula with Surv(tstart, tstop, event) on the left hand site of ~.

data

data.frame or environment containing the outcome and co-variates.

model

either 'logit' for binary outcomes or 'exponential' for piecewise constant exponential distributed arrival times.

interval length of the bins in which parameters are fixed.

max_T

end of the last interval interval.

vector of ids for each row of the in the design matrix.

a_0

vector \(a_0\) for the initial coefficient vector for the first iteration (optional). Default is estimates from static model (see static_glm).

Q_0

covariance matrix for the prior distribution.

initial covariance matrix for the state equation.

order

order of the random walk.

control

list of control variables (see the control section below).

trace

argument to get progress information. Zero will yield no info and larger integer values will yield incrementally more information.

seed

seed to set at the start of every EM iteration.

...

optional way to pass arguments to control.

Value

An object of class PF_EM.

Details

See vignette("Particle_filtering", "dynamichazard") for details.

Examples

Run this code

# NOT RUN {
#####
# Fit model with lung data set from survival
# Warning: this has a longer computation time

# }
# NOT RUN {
library(dynamichazard)
.lung <- lung[!is.na(lung$ph.ecog), ]
set.seed(43588155)
pf_fit <- PF_EM(
 Surv(time, status == 2) ~ ph.ecog + age,
 data = .lung, by = 50, id = 1:nrow(.lung),
 Q_0 = diag(1, 3), Q = diag(1, 3),
 max_T = 800,
 control = PF_control(
   N_fw_n_bw = 500,
   N_first = 2500,
   N_smooth = 2500,
   n_max = 50,
   n_threads = max(parallel::detectCores(logical = FALSE), 1)),
 trace = 1)

# Plot state vector estimates
plot(pf_fit, cov_index = 1)
plot(pf_fit, cov_index = 2)
plot(pf_fit, cov_index = 3)

# Plot log-likelihood
plot(pf_fit$log_likes)
# }
# NOT RUN {
#####
# Can be compared with this example from ?coxph in R 3.4.1. Though, the above
# only has a linear effect for age

# }
# NOT RUN {
cox <- coxph(
 Surv(time, status) ~ ph.ecog + tt(age), data= .lung,
 tt=function(x,t,...) pspline(x + t/365.25))
cox
# }
# NOT RUN {

# }
# NOT RUN {
######
# example with fixed effects

# prepare data
temp <- subset(pbc, id <= 312, select=c(id, sex, time, status, edema, age))
pbc2 <- tmerge(temp, temp, id=id, death = event(time, status))
pbc2 <- tmerge(pbc2, pbcseq, id=id, albumin = tdc(day, albumin),
              protime = tdc(day, protime), bili = tdc(day, bili))
pbc2 <- pbc2[, c("id", "tstart", "tstop", "death", "sex", "edema",
                "age", "albumin", "protime", "bili")]
pbc2 <- within(pbc2, {
 log_albumin <- log(albumin)
 log_protime <- log(protime)
 log_bili <- log(bili)
})

# standardize
for(c. in c("age", "log_albumin", "log_protime", "log_bili"))
 pbc2[[c.]] <- drop(scale(pbc2[[c.]]))

# fit model with extended Kalman filter
ddfit <- ddhazard(
 Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
   ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
 pbc2, Q_0 = 100, Q = 1e-2, by = 100, id = pbc2$id,
 model = "exponential", max_T = 3600,
 control = list(eps = 1e-5, NR_eps = 1e-4, n_max = 1e4))
summary(ddfit)

# fit model with particle filter
set.seed(88235076)
ppfit <- PF_EM(
 Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
   ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
 pbc2, Q_0 = 100, Q = ddfit$Q * 100, # use estimate from before
 by = 100, id = pbc2$id,
 model = "exponential", max_T = 3600,
 control = PF_control(
   N_fw_n_bw = 250, N_smooth = 500, N_first = 1000, eps = 1e-3,
   method = "AUX_normal_approx_w_cloud_mean",
   n_max = 25, # just take a few iterations as an example
   n_threads = max(parallel::detectCores(logical = FALSE), 1), trace = TRUE)

# compare results
plot(ddfit)
plot(ppfit)
sqrt(ddfit$Q * 100)
sqrt(ppfit$Q)
rbind(ddfit$fixed_effects, ppfit$fixed_effects)
# }

Run the code above in your browser using DataLab