cIndex: Concordance index

Description

Calculates the concordance index for discrete survival models, which does not depend on time. This is the probability that, for a pair of randomly chosen comparable samples, the sample with the higher risk prediction will experience an event before the other sample or belongs to a higher binary class.

Usage

cIndex(marker, testTime, testEvent, trainTime, trainEvent)

Arguments

marker

Gives the predicted values of the linear predictor of a regression model ("numeric vector"). May also be on the response scale.

testTime

New time intervals in the test data ("integer vector").

testEvent

Event indicators in the test data ("binary vector").

trainTime

Time intervals in the training data ("integer vector").

trainEvent

Event indicators in the training data ("binary vector").

Value

Value of discrete concordance index between zero and one ("numeric vector").

References

schmidDiscMeasurediscSurv unoEvalPreddiscSurv heagertySurvROCdiscSurv

Examples

Run this code

# NOT RUN {
##################################################
# Example with unemployment data and prior fitting

library(Ecdat)
library(caret)
library(mgcv)
data(UnempDur)
summary(UnempDur$spell)
# Extract subset of data
set.seed(635)
IDsample <- sample(1:dim(UnempDur)[1], 100)
UnempDurSubset <- UnempDur [IDsample, ]
set.seed(-570)
TrainingSample <- sample(1:100, 75)
UnempDurSubsetTrain <- UnempDurSubset [TrainingSample, ]
UnempDurSubsetTest <- UnempDurSubset [-TrainingSample, ]

# Convert to long format
UnempDurSubsetTrainLong <- dataLong(dataShort = UnempDurSubsetTrain, 
timeColumn = "spell", eventColumn = "censor1")

# Estimate gam with smooth baseline
gamFit <- gam(formula = y ~ s(I(as.numeric(as.character(timeInt)))) + 
s(age) + s(logwage), data = UnempDurSubsetTrainLong, family = binomial())
gamFitPreds <- predict(gamFit, newdata = cbind(UnempDurSubsetTest, 
timeInt = UnempDurSubsetTest$spell))

# Evaluate C-Index based on short data format
cIndex(marker = gamFitPreds, 
testTime = UnempDurSubsetTest$spell, 
testEvent = UnempDurSubsetTest$censor1, 
trainTime = UnempDurSubsetTrain$spell, 
trainEvent = UnempDurSubsetTrain$censor1)

#####################################
# Example National Wilm's Tumor Study

library(survival)
head(nwtco)
summary(nwtco$rel)

# Select subset
set.seed(-375)
Indices <- sample(1:dim(nwtco)[1], 500)
nwtcoSub <- nwtco [Indices, ]

# Convert time range to 30 intervals
intLim <- quantile(nwtcoSub$edrel, prob = seq(0, 1, length.out = 30))
intLim [length(intLim)] <- intLim [length(intLim)] + 1
nwtcoSubTemp <- contToDisc(dataShort = nwtcoSub, timeColumn = "edrel", intervalLimits = intLim)
nwtcoSubTemp$instit <- factor(nwtcoSubTemp$instit)
nwtcoSubTemp$histol <- factor(nwtcoSubTemp$histol)
nwtcoSubTemp$stage <- factor(nwtcoSubTemp$stage)

# Split in training and test sample
set.seed(-570)
TrainingSample <- sample(1:dim(nwtcoSubTemp)[1], round(dim(nwtcoSubTemp)[1]*0.75))
nwtcoSubTempTrain <- nwtcoSubTemp [TrainingSample, ]
nwtcoSubTempTest <- nwtcoSubTemp [-TrainingSample, ]

# Convert to long format
nwtcoSubTempTrainLong <- dataLong(dataShort = nwtcoSubTempTrain, 
timeColumn = "timeDisc", eventColumn = "rel", timeAsFactor=TRUE)

# Estimate glm
inputFormula <- y ~ timeInt + histol + instit + stage
glmFit <- glm(formula = inputFormula, data = nwtcoSubTempTrainLong, family = binomial())
linPreds <- predict(glmFit, newdata = cbind(nwtcoSubTempTest, 
timeInt = factor(nwtcoSubTempTest$timeDisc, levels=levels(nwtcoSubTempTrainLong$timeInt))))

# Evaluate C-Index based on short data format
cIndex(marker = linPreds, 
testTime = as.numeric(as.character(nwtcoSubTempTest$timeDisc)), 
testEvent = nwtcoSubTempTest$rel, 
trainTime = as.numeric(as.character(nwtcoSubTempTrain$timeDisc)), 
trainEvent = nwtcoSubTempTrain$rel) 


# }

Run the code above in your browser using DataLab