# NOT RUN {
# Example unemployment data
library(Ecdat)
data(UnempDur)
# Select subsample
subUnempDur <- UnempDur [1:100, ]
head(subUnempDur)
# Convert to long format
UnempLong <- dataLong (dataShort = subUnempDur, timeColumn = "spell", eventColumn = "censor1")
head(UnempLong, 20)
# Is there exactly one observed event of y for each person?
splitUnempLong <- split(UnempLong, UnempLong$obj)
all(sapply(splitUnempLong, function (x) sum(x$y))==subUnempDur$censor1) # TRUE
# Second example: Acute Myelogenous Leukemia survival data
library(survival)
head(leukemia)
leukLong <- dataLong(dataShort = leukemia, timeColumn = "time",
eventColumn = "status", timeAsFactor=TRUE)
head(leukLong, 30)
# Estimate discrete survival model
estGlm <- glm(formula = y ~ timeInt + x, data=leukLong, family = binomial())
summary(estGlm)
# Estimate survival curves for non-maintained chemotherapy
newDataNonMaintained <- data.frame(timeInt = factor(1:161), x = rep("Nonmaintained"))
predHazNonMain <- predict(estGlm, newdata = newDataNonMaintained, type = "response")
predSurvNonMain <- cumprod(1-predHazNonMain)
# Estimate survival curves for maintained chemotherapy
newDataMaintained <- data.frame(timeInt = factor(1:161), x = rep("Maintained"))
predHazMain <- predict(estGlm, newdata = newDataMaintained, type = "response")
predSurvMain <- cumprod(1-predHazMain)
# Compare survival curves
plot(x = 1:50, y = predSurvMain [1:50], xlab = "Time", ylab = "S(t)", las = 1,
type = "l", main = "Effect of maintained chemotherapy on survival of leukemia patients")
lines(x = 1:161, y = predSurvNonMain, col = "red")
legend("topright", legend = c("Maintained chemotherapy", "Non-maintained chemotherapy"),
col = c("black", "red"), lty = rep(1, 2))
# The maintained therapy has clearly a positive effect on survival over the time range
##############################################
# Simulation
# Single event in case of right-censoring
# Simulate multivariate normal distribution
library(discSurv)
library(mvnfast)
set.seed(-1980)
X <- mvnfast::rmvn(n = 1000, mu = rep(0, 10), sigma = diag(10))
# Specification of discrete hazards with 11 theoretical intervals
betaCoef <- seq(-1, 1, length.out = 11)[-6]
timeInt <- seq(-1, 1, length.out = 10)
linPred <- c(X %*% betaCoef)
hazTimeX <- cbind(sapply(1:length(timeInt),
function(x) exp(linPred+timeInt[x]) / (1+exp(linPred+timeInt[x])) ), 1)
# Simulate discrete survival and censoring times in 10 observed intervals
discT <- rep(NA, dim(hazTimeX)[1])
discC <- rep(NA, dim(hazTimeX)[1])
for( i in 1:dim(hazTimeX)[1] ){
discT[i] <- sample(1:11, size = 1, prob = estMargProb(haz=hazTimeX[i, ]))
discC[i] <- sample(1:11, size = 1, prob = c(rep(1/11, 11)))
}
# Calculate observed times, event indicator and specify short data format
eventInd <- discT <= discC
obsT <- ifelse(eventInd, discT, discC)
eventInd[obsT == 11] <- 0
obsT[obsT == 11] <- 10
simDatShort <- data.frame(obsT = obsT, event = as.numeric(eventInd), X)
# Convert data to discrete data long format
simDatLong <- dataLong(dataShort = simDatShort, timeColumn = "obsT", eventColumn = "event",
timeAsFactor=TRUE)
# Estimate discrete-time continuation ratio model
formSpec <- as.formula(paste("y ~ timeInt + ",
paste(paste("X", 1:10, sep=""), collapse = " + "), sep = ""))
modelFit <- glm(formula = formSpec, data = simDatLong, family = binomial(link = "logit"))
summary(modelFit)
# Compare estimated to true coefficients
coefModel <- coef(modelFit)
MSE_covariates <- mean((coefModel[11:20]-timeInt)^2)
MSE_covariates
# -> Estimated coefficients are near true coefficients
# }
Run the code above in your browser using DataLab