##################################################
# Example with unemployment data and prior fitting
library(Ecdat)
library(caret)
library(mgcv)
data(UnempDur)
summary(UnempDur$spell)
# Extract subset of data
set.seed(635)
IDsample <- sample(1:dim(UnempDur)[1], 100)
UnempDurSubset <- UnempDur [IDsample, ]
set.seed(-570)
TrainingSample <- sample(1:100, 75)
UnempDurSubsetTrain <- UnempDurSubset [TrainingSample, ]
UnempDurSubsetTest <- UnempDurSubset [-TrainingSample, ]
# Convert to long format
UnempDurSubsetTrainLong <- dataLong(dataSet=UnempDurSubsetTrain,
timeColumn="spell", censColumn="censor1")
# Estimate gam with smooth baseline
gamFit <- gam(formula=y ~ s(I(as.numeric(as.character(timeInt)))) +
s(age) + s(logwage), data=UnempDurSubsetTrainLong, family=binomial())
gamFitPreds <- predict(gamFit, newdata=cbind(UnempDurSubsetTest,
timeInt=UnempDurSubsetTest$spell))
# Estimate tpr given one training and one test sample
tprGamFit <- tprUnoShort (timepoint=1, marker=gamFitPreds,
newTime=UnempDurSubsetTest$spell, newEvent=UnempDurSubsetTest$censor1,
trainTime=UnempDurSubsetTrain$spell, trainEvent=UnempDurSubsetTrain$censor1)
plot(tprGamFit)
#####################################
# Example National Wilm's Tumor Study
library(survival)
head(nwtco)
summary(nwtco$rel)
# Select subset
set.seed(-375)
Indices <- sample(1:dim(nwtco)[1], 500)
nwtcoSub <- nwtco [Indices, ]
# Convert time range to 30 intervals
intLim <- quantile(nwtcoSub$edrel, prob=seq(0, 1, length.out=30))
intLim [length(intLim)] <- intLim [length(intLim)] + 1
nwtcoSubTemp <- contToDisc(dataSet=nwtcoSub, timeColumn="edrel", intervalLimits=intLim)
nwtcoSubTemp$instit <- factor(nwtcoSubTemp$instit)
nwtcoSubTemp$histol <- factor(nwtcoSubTemp$histol)
nwtcoSubTemp$stage <- factor(nwtcoSubTemp$stage)
# Split in training and test sample
set.seed(-570)
TrainingSample <- sample(1:dim(nwtcoSubTemp)[1], round(dim(nwtcoSubTemp)[1]*0.75))
nwtcoSubTempTrain <- nwtcoSubTemp [TrainingSample, ]
nwtcoSubTempTest <- nwtcoSubTemp [-TrainingSample, ]
# Convert to long format
nwtcoSubTempTrainLong <- dataLong(dataSet=nwtcoSubTempTrain,
timeColumn="timeDisc", censColumn="rel")
# Estimate glm
inputFormula <- y ~ timeInt + histol + instit + stage
glmFit <- glm(formula=inputFormula, data=nwtcoSubTempTrainLong, family=binomial())
linPreds <- predict(glmFit, newdata=cbind(nwtcoSubTempTest,
timeInt=nwtcoSubTempTest$timeDisc))
# Estimate tpr given one training and one test sample at time interval 10
tprFit <- tprUnoShort (timepoint=10, marker=linPreds,
newTime=nwtcoSubTempTest$timeDisc, newEvent=nwtcoSubTempTest$rel,
trainTime=nwtcoSubTempTrain$timeDisc, trainEvent=nwtcoSubTempTrain$rel)
plot(tprFit)
Run the code above in your browser using DataLab