# NOT RUN {
####################
# Data preprocessing
# Example unemployment data
library(Ecdat)
data(UnempDur)
# Select subsample
selectInd1 <- 1:100
selectInd2 <- 101:200
trainSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd1], ]
valSet <- UnempDur[which(UnempDur$spell %in% (1:10))[selectInd2], ]
# Convert to long format
trainSet_long <- dataLongSubDist(dataShort = trainSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
valSet_long <- dataLongSubDist(dataShort = valSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
# Compute new weights of the validation data set
valSet_long$subDistWeights <- weightsLtoT(trainSet, valSet, timeColumn = "spell",
eventColumns = c("censor1", "censor4"), eventFocus = "censor1")
# Estimate continuation ratio model with logit link
glmFit <- glm(formula = y ~ timeInt + age + logwage, data = trainSet_long,
family = binomial(), weights = trainSet_long$subDistWeights)
# Calculate predicted discrete hazards
predHazards <- predict(glmFit, newdata = valSet_long, type = "response")
# Calibration plot
calibrationPlot(predHazards, testDataLong = valSet_long, weights = valSet_long$subDistWeights)
# }
Run the code above in your browser using DataLab