# NOT RUN {
####################
# Data preprocessing
# Example unemployment data
library(Ecdat)
data(UnempDur)
# Select subsample
selectInd1 <- 1:100
selectInd2 <- 101:200
trainSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd1], ]
valSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd2], ]
####################
# One event
# Convert to long format
trainSet_long <- dataLong(dataShort = trainSet, timeColumn = "spell", eventColumn = "censor1")
valSet_long <- dataLong(dataShort = valSet, timeColumn = "spell", eventColumn = "censor1")
# Estimate continuation ratio model with logit link
glmFit <- glm(formula = y ~ timeInt + age + logwage, data = trainSet_long, family = binomial())
# Calculate linear predictors on validation set
linPred <- predict(glmFit, newdata = valSet_long, type = "link")
# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long)
summary(recalModel)
# Calibration plots
hazOrg <- predict(glmFit, newdata = valSet_long, type = "response")
hazRecal <- predict(recalModel, newdata = data.frame(linPred), type = "response")
# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long)
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long)
############################
# Two cause specific hazards
library(VGAM)
# Convert to long format
trainSet_long <- dataLongCompRisks(dataShort = trainSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"))
valSet_long <- dataLongCompRisks(dataShort = valSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"))
# Estimate continuation ratio model with logit link
vglmFit <- VGAM::vglm(formula = cbind(e0, e1, e2) ~ timeInt + age + logwage, data = trainSet_long,
family = VGAM::multinomial(refLevel = "e0"))
# Calculate linear predictors on training and test set
linPred <- VGAM::predictvglm(vglmFit, newdata = valSet_long, type = "link")
# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long)
recalModel
# Calibration plots
hazOrg <- predict(vglmFit, newdata = valSet_long, type = "response")
predDat <- as.data.frame(linPred)
names(predDat) <- recalModel@misc$colnames.x[-1]
hazRecal <- predictvglm(recalModel, newdata = predDat, type = "response")
# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long, event = "e1")
calibrationPlot(hazOrg, testDataLong = valSet_long, event = "e2")
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long, event = "e1")
calibrationPlot(hazRecal, testDataLong = valSet_long, event = "e2")
###############################
# Subdistribution hazards model
# Convert to long format
trainSet_long <- dataLongSubDist(dataShort = trainSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
valSet_long <- dataLongSubDist(dataShort = valSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
# Estimate continuation ratio model with logit link
glmFit <- glm(formula = y ~ timeInt + age + logwage, data = trainSet_long,
family = binomial(), weights = trainSet_long$subDistWeights)
# Calculate linear predictors on training and test set
linPred <- predict(glmFit, newdata = valSet_long, type = "link")
# Estimate logistic recalibration model
recalModel <- estRecal(testLinPred = linPred, testDataLong = valSet_long,
weights = valSet_long$subDistWeights)
recalModel
# Calibration plots
hazOrg <- predict(glmFit, newdata = valSet_long, type = "response",
weights = valSet_long$subDistWeights)
hazRecal <- predict(recalModel, newdata = data.frame(linPred), type = "response",
weights = valSet_long$subDistWeights)
# Before logistic recalibration
calibrationPlot(hazOrg, testDataLong = valSet_long,
weights=valSet_long$subDistWeights)
# After logistic recalibration
calibrationPlot(hazRecal, testDataLong = valSet_long,
weights=valSet_long$subDistWeights)
# }
Run the code above in your browser using DataLab