estRecal: Logistic recalibration based on linear predictors

Description

Fits a logistic recalibration model to independent test data. It updates the intercept and slope parameters. It is assumed that the factor levels of time are equal in both training and validation data. Time dependent covariates, discrete cause specific competing risks and subdistribution hazards are also supported.

Usage

estRecal(testLinPred, testDataLong, weights = NULL)

Arguments

testLinPred

Calculated linear predictor on the validation data with model fitted on training data ("numeric vector").

testDataLong

Validation data set in long format ("class data.frame").

weights

Weights used in estimation of the logistic recalibration model ("numeric vector"). Default is no weighting (NULL).

Value

Continuation ratio model that calibrates estimated discrete hazards to new validation environment ("class glm, lm").

Details

Updates estimated hazards of discrete survival models to better adapt to a different environment. If there are substantial environment changes the predicted probabilities will differ between two environments. Logistic recalibration may be used to improve the calibration of predicted probabilities by incorperating information from the existing model and data from the environment. This approach works for any survival prediction model with one event that provides linear predictors.

References

heyardValCompRisksdiscSurv

Examples

Run this code

# NOT RUN {
####################
# Data preprocessing

# Example unemployment data
library(Ecdat)
data(UnempDur)

# Select subsample
selectInd1 <- 1:100
selectInd2 <- 101:200
trainSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd1], ]
valSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd2], ]

####################
# One event

# Convert to long format
trainSet_long <- dataLong(dataShort = trainSet, timeColumn = "spell", eventColumn = "censor1")
valSet_long <- dataLong(dataShort = valSet, timeColumn = "spell", eventColumn = "censor1")

# Estimate continuation ratio model with logit link
glmFit <- glm(formula = y ~ timeInt + age + logwage, data = trainSet_long, family = binomial())

# Calculate linear predictors on validation set
linPred <- predict(glmFit, newdata = valSet_long, type = "link")

# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long)
summary(recalModel)

# Calibration plots
hazOrg <- predict(glmFit, newdata = valSet_long, type = "response")
hazRecal <- predict(recalModel, newdata = data.frame(linPred), type = "response")

# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long)
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long)

############################
# Two cause specific hazards 
library(VGAM)

# Convert to long format
trainSet_long <- dataLongCompRisks(dataShort = trainSet, timeColumn = "spell", 
eventColumns = c("censor1", "censor4"))
valSet_long <- dataLongCompRisks(dataShort = valSet, timeColumn = "spell", 
eventColumns = c("censor1", "censor4"))

# Estimate continuation ratio model with logit link
vglmFit <- VGAM::vglm(formula = cbind(e0, e1, e2) ~ timeInt + age + logwage, data = trainSet_long, 
family = VGAM::multinomial(refLevel = "e0"))

# Calculate linear predictors on training and test set
linPred <- VGAM::predictvglm(vglmFit, newdata = valSet_long, type = "link")

# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long)
recalModel

# Calibration plots
hazOrg <- predict(vglmFit, newdata = valSet_long, type = "response")
predDat <- as.data.frame(linPred)
names(predDat) <- recalModel@misc$colnames.x[-1]
hazRecal <- predictvglm(recalModel, newdata = predDat, type = "response")

# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long, event = "e1")
calibrationPlot(hazOrg, testDataLong = valSet_long, event = "e2")
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long, event = "e1")
calibrationPlot(hazRecal, testDataLong = valSet_long, event = "e2")

###############################
# Subdistribution hazards model

# Convert to long format
trainSet_long <- dataLongSubDist(dataShort = trainSet, timeColumn = "spell", 
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
valSet_long <- dataLongSubDist(dataShort = valSet, timeColumn = "spell", 
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")

# Estimate continuation ratio model with logit link
glmFit <- glm(formula = y ~ timeInt + age + logwage, data = trainSet_long, 
family = binomial(), weights = trainSet_long$subDistWeights)

# Calculate linear predictors on training and test set
linPred <- predict(glmFit, newdata = valSet_long, type = "link")

# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long, 
weights = valSet_long$subDistWeights)
recalModel

# Calibration plots
hazOrg <- predict(glmFit, newdata = valSet_long, type = "response",
weights = valSet_long$subDistWeights)
hazRecal <- predict(recalModel, newdata = data.frame(linPred), type = "response",
weights = valSet_long$subDistWeights)

# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long,
weights=valSet_long$subDistWeights)
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long,
weights=valSet_long$subDistWeights)

# }

Run the code above in your browser using DataLab