# NOT RUN {
# Example with unemployment data
library(Ecdat)
data(UnempDur)
# Select subsample
SubUnempDur <- UnempDur [1:100, ]
# Convert competing risk data to long format
SubUnempDurLong <- dataLongCompRisks (dataShort = SubUnempDur, timeColumn = "spell",
eventColumns = c("censor1", "censor2", "censor3", "censor4"))
head(SubUnempDurLong, 20)
# Fit multinomial logit model with VGAM package
# with one coefficient per response
library(VGAM)
multLogitVGM <- vgam(cbind(e0, e1, e2, e3, e4) ~ timeInt + ui + age + logwage,
family = multinomial(refLevel = 1),
data = SubUnempDurLong)
coef(multLogitVGM)
# Alternative: Use nnet
# Convert response to factor
rawResponseMat <- SubUnempDurLong[, c("e0", "e1", "e2", "e3", "e4")]
NewFactor <- factor(unname(apply(rawResponseMat, 1, function(x) which(x == 1))),
labels = colnames(rawResponseMat))
# Include recoded response in data
SubUnempDurLong <- cbind(SubUnempDurLong, NewResp = NewFactor)
# Construct formula of mlogit model
mlogitFormula <- formula(NewResp ~ timeInt + ui + age + logwage)
# Fit multinomial logit model
# with one coefficient per response
library(nnet)
multLogitNNET <- multinom(formula = mlogitFormula, data = SubUnempDurLong)
coef(multLogitNNET)
###########################################################
# Simulation
# Cause specific competing risks in case of right-censoring
# Discrete subdistribution hazards model
# Simulate covariates as multivariate normal distribution
library(mvnfast)
set.seed(1980)
X <- mvnfast::rmvn(n = 1000, mu = rep(0, 4), sigma = diag(4))
# Specification of two discrete cause specific hazards with four intervals
# Event 1
theoInterval <- 4
betaCoef_event1 <- seq(-1, 1, length.out = 5)[-3]
timeInt_event1 <- seq(0.1, -0.1, length.out = theoInterval-1)
linPred_event1 <- c(X %*% betaCoef_event1)
# Event 2
betaCoef_event2 <- seq(-0.5, 0.5, length.out = 5)[-3]
timeInt_event2 <- seq(-0.1, 0.1, length.out = theoInterval-1)
linPred_event2 <- c(X %*% betaCoef_event2)
# Discrete cause specific hazards in last theoretical interval
theoHaz_event1 <- 0.5
theoHaz_event2 <- 0.5
haz_event1_X <- cbind(sapply(1:length(timeInt_event1),
function(x) exp(linPred_event1 + timeInt_event1[x]) /
(1 + exp(linPred_event1 + timeInt_event1[x]) +
exp(linPred_event2 + timeInt_event2[x])) ), theoHaz_event1)
haz_event2_X <- cbind(sapply(1:length(timeInt_event2),
function(x) exp(linPred_event2 + timeInt_event2[x]) /
(1 + exp(linPred_event1 + timeInt_event1[x]) +
exp(linPred_event2 + timeInt_event2[x]) ) ), theoHaz_event2)
allCauseHaz_X <- haz_event1_X + haz_event2_X
pT_X <- t(sapply(1:dim(allCauseHaz_X)[1], function(i) estMargProb(allCauseHaz_X[i, ]) ))
pR_T_X_event1 <- haz_event1_X / (haz_event1_X + haz_event2_X)
survT <- sapply(1:dim(pT_X)[1], function(i) sample(x = 1:(length(timeInt_event1) + 1),
size = 1, prob = pT_X[i, ]) )
censT <- sample(x = 1:(length(timeInt_event1)+1), size = dim(pT_X)[1],
prob = rep(1/(length(timeInt_event1) + 1), (length(timeInt_event1) + 1)),
replace = TRUE)
obsT <- ifelse(survT <= censT, survT, censT)
obsEvent <- rep(0, length(obsT))
obsEvent <- sapply(1:length(obsT),
function(i) if(survT[i] <= censT[i]){
return(sample(x = c(1, 2), size=1,
prob = c(pR_T_X_event1[i, obsT[i] ],
1 - pR_T_X_event1[i, obsT[i] ]) ))
} else{
return(0)
}
)
# Recode last interval to censored
lastInterval <- obsT == theoInterval
obsT[lastInterval] <- theoInterval - 1
obsEvent[lastInterval] <- 0
obsT <- factor(obsT)
obsEvent <- factor(obsEvent)
datShort <- data.frame(event = factor(obsEvent), time = obsT, X)
datLong <- dataLongCompRisks(dataShort = datShort, timeColumn = "time",
eventColumns = "event", responseAsFactor = TRUE,
eventColumnsAsFactor = TRUE, timeAsFactor = TRUE)
# Estimate discrete cause specific hazard model
library(VGAM)
estModel <- vglm(formula=responses ~ timeInt + X1 + X2 + X3 + X4, data=datLong,
family = multinomial(refLevel = 1))
# Mean squared errors per event
coefModels <- coef(estModel)
mean((coefModels[seq(7, length(coefModels), 2)] - betaCoef_event1)^2) # Event 1
mean((coefModels[seq(8, length(coefModels), 2)] - betaCoef_event2)^2) # Event 2
# -> Estimated coefficients are near true coefficients for each event type
# }
Run the code above in your browser using DataLab