# NOT RUN {
##################################################
# Example with unemployment data and prior fitting
library(Ecdat)
library(caret)
library(mgcv)
data(UnempDur)
summary(UnempDur$spell)
# Extract subset of data
set.seed(635)
IDsample <- sample(1:dim(UnempDur)[1], 100)
UnempDurSubset <- UnempDur [IDsample, ]
set.seed(-570)
TrainingSample <- sample(1:100, 75)
UnempDurSubsetTrain <- UnempDurSubset [TrainingSample, ]
UnempDurSubsetTest <- UnempDurSubset [-TrainingSample, ]
# Convert to long format
UnempDurSubsetTrainLong <- dataLong(dataShort = UnempDurSubsetTrain,
timeColumn = "spell", eventColumn = "censor1")
# Estimate gam with smooth baseline
gamFit <- gam(formula = y ~ s(I(as.numeric(as.character(timeInt)))) +
s(age) + s(logwage), data = UnempDurSubsetTrainLong, family = binomial())
gamFitPreds <- predict(gamFit, newdata = cbind(UnempDurSubsetTest,
timeInt = UnempDurSubsetTest$spell))
# Evaluate C-Index based on short data format
cIndex(marker = gamFitPreds,
testTime = UnempDurSubsetTest$spell,
testEvent = UnempDurSubsetTest$censor1,
trainTime = UnempDurSubsetTrain$spell,
trainEvent = UnempDurSubsetTrain$censor1)
#####################################
# Example National Wilm's Tumor Study
library(survival)
head(nwtco)
summary(nwtco$rel)
# Select subset
set.seed(-375)
Indices <- sample(1:dim(nwtco)[1], 500)
nwtcoSub <- nwtco [Indices, ]
# Convert time range to 30 intervals
intLim <- quantile(nwtcoSub$edrel, prob = seq(0, 1, length.out = 30))
intLim [length(intLim)] <- intLim [length(intLim)] + 1
nwtcoSubTemp <- contToDisc(dataShort = nwtcoSub, timeColumn = "edrel", intervalLimits = intLim)
nwtcoSubTemp$instit <- factor(nwtcoSubTemp$instit)
nwtcoSubTemp$histol <- factor(nwtcoSubTemp$histol)
nwtcoSubTemp$stage <- factor(nwtcoSubTemp$stage)
# Split in training and test sample
set.seed(-570)
TrainingSample <- sample(1:dim(nwtcoSubTemp)[1], round(dim(nwtcoSubTemp)[1]*0.75))
nwtcoSubTempTrain <- nwtcoSubTemp [TrainingSample, ]
nwtcoSubTempTest <- nwtcoSubTemp [-TrainingSample, ]
# Convert to long format
nwtcoSubTempTrainLong <- dataLong(dataShort = nwtcoSubTempTrain,
timeColumn = "timeDisc", eventColumn = "rel", timeAsFactor=TRUE)
# Estimate glm
inputFormula <- y ~ timeInt + histol + instit + stage
glmFit <- glm(formula = inputFormula, data = nwtcoSubTempTrainLong, family = binomial())
linPreds <- predict(glmFit, newdata = cbind(nwtcoSubTempTest,
timeInt = factor(nwtcoSubTempTest$timeDisc, levels=levels(nwtcoSubTempTrainLong$timeInt))))
# Evaluate C-Index based on short data format
cIndex(marker = linPreds,
testTime = as.numeric(as.character(nwtcoSubTempTest$timeDisc)),
testEvent = nwtcoSubTempTest$rel,
trainTime = as.numeric(as.character(nwtcoSubTempTrain$timeDisc)),
trainEvent = nwtcoSubTempTrain$rel)
# }
Run the code above in your browser using DataLab