# NOT RUN {
family <- "gaussian" #"poisson"
delta = 1 # moderate main effect
s=2 # if s=2 (s=1), a nonlinear (linear) contrast function
n=500 # number of subjects
p=10 # number of pretreatment covariates
# generate training data
data <- generate.data(n= n, p=p, delta = delta, s= s, family = family)
data$SNR # the ratio of interactions("signal") vs. main effects("noise")
A <- data$A
y <- data$y
X <- data$X
# generate testing data
data.test <- generate.data(n=10^5, p=p, delta = delta, s= s, family = family)
A.test <- data.test$A
y.test <- data.test$y
X.test <- data.test$X
data.test$value.opt # the optimal "value"
# fit SIMML
#1) SIMML without X main effect
simml.obj1 <- simml(y, A, X, family = family)
#2) SIMML with X main effect (estimation efficiency for the g term of SIMML can be improved)
simml.obj2 <- simml(y, A, X, Xm = X, family = family)
# apply the estimated SIMML to the testing set and obtain treatment assignment rules.
simml.trt.rule1 <- pred.simml(simml.obj1, newX= X.test)$trt.rule
# "value" estimation (estimated by IPWE)
simml.value1 <- mean(y.test[simml.trt.rule1 == A.test])
simml.value1
simml.trt.rule2 <- pred.simml(simml.obj2, newX= X.test)$trt.rule
simml.value2 <- mean(y.test[simml.trt.rule2 == A.test])
simml.value2
# compare these to the optimal "value"
data.test$value.opt
# fit MC (modified covariates) model of Tien et al 2014
n.A <- summary(as.factor(A)); pi.A <- n.A/sum(n.A)
mc <- (as.numeric(A) + pi.A[1] -2) *cbind(1, X) # 0.5*(-1)^as.numeric(A) *cbind(1, X)
mc.coef <- coef(glm(y ~ mc, family = family))
mc.trt.rule <- (cbind(1, X.test) %*% mc.coef[-1] > 0) +1
# "value" estimation (estimated by IPWE)
mc.value <- mean(y.test[mc.trt.rule == A.test])
mc.value
# visualization of the estimated link functions of SIMML
simml.obj1$beta.coef # estimated single-index coefficients
g.fit <- simml.obj1$g.fit # estimated trt-specific link functions; "g.fit" is a mgcv::gam object.
#plot(g.fit)
# }
# NOT RUN {
# can improve visualization by using the package "mgcViz"
#install.packages("mgcViz")
# mgcViz depends on "rgl". "rgl" depends on XQuartz, which you can download from xquartz.org
#library(mgcViz)
# transform the "mgcv::gam" object to a "mgcViz" object (to improve visualization)
g.fit <- getViz(g.fit)
plot1 <- plot( sm(g.fit,1) ) # for treatment group 1
plot1 + l_fitLine(colour = "red") + l_rug(mapping = aes(x=x, y=y), alpha = 0.8) +
l_ciLine(mul = 5, colour = "blue", linetype = 2) +
l_points(shape = 19, size = 1, alpha = 0.1) +
xlab(expression(paste("z = ", alpha*minute, "x"))) + ylab("y") +
ggtitle("Treatment group 1 (Tr =1)") + theme_classic()
plot2 <- plot( sm(g.fit,2) ) # for treatment group 2
plot2 + l_fitLine(colour = "red") + l_rug(mapping = aes(x=x, y=y), alpha = 0.8) +
l_ciLine(mul = 5, colour = "blue", linetype = 2) +
l_points(shape = 19, size = 1, alpha = 0.1) +
xlab(expression(paste("z = ", alpha*minute, "x"))) +ylab("y") +
ggtitle("Treatment group 2 (Tr =2)") + theme_classic()
trans = function(x) x + g.fit$coefficients[2]
plotDiff(s1 = sm(g.fit, 2), s2 = sm(g.fit, 1), trans=trans) + l_ciPoly() +
l_fitLine() + geom_hline(yintercept = 0, linetype = 2) +
xlab(expression(paste("z = ", alpha*minute, "x")) ) +
ylab("(Treatment 2 effect) - (Treatment 1 effect)") +
ggtitle("Contrast between two treatment effects") +
theme_classic()
# yet another way of visualization, using ggplot2
#library(ggplot2)
dat <- data.frame(y= simml.obj1$g.fit$model$y,
x= simml.obj1$g.fit$model$single.index,
Treatment= simml.obj1$g.fit$model$Tr)
g.plot<- ggplot(dat, aes(x=x,y=y,color=Treatment,shape=Treatment,linetype=Treatment))+
geom_point(aes(color=Treatment, shape=Treatment), size=1, fill="white") +
scale_colour_brewer(palette="Set1", direction=-1) + theme_classic() +
xlab(expression(paste(alpha*minute,"x"))) + ylab("y")
g.plot + geom_smooth(method=gam, formula= y~ s(x, bs=simml.obj1$bs, k=simml.obj1$k),
se=TRUE, fullrange=TRUE, alpha = 0.35)
# }
# NOT RUN {
# }
# NOT RUN {
# can obtain bootstrap CIs for beta.coef.
simml.obj <- simml(y,Tr,X,Xm=X, family=family,bootstrap=TRUE,nboot=15) #nboot=500.
simml.obj$beta.coef
round(simml.obj$boot.ci,3)
# compare the estimates to the true beta.coef.
data$true.beta
# }
# NOT RUN {
# an application to data with ordinal categorical response
dat <- ordinal.data(n=500, p=5, R = 11, # 11 response levels
s = "nonlinear", # nonlinear interactions
delta = 1)
dat$SNR
y <- dat$y # ordinal response
X <- dat$X # X matrix
A <- dat$A # treatment
dat$true.beta # the "true" single-index coefficient
# 1) fit a cumulative logit simml, with a flexible link function
res <- simml(y,A,X, family="ordinal", R=11)
res$beta.coef # single-index coefficients.
res$g.fit$family$getTheta(TRUE) # the estimated R-1 threshold values.
# 2) fit a cumulative logit simml, with a linear link function
res2 <- simml(y,A,X, family="ordinal", R=11, linear.link = TRUE)
res2$beta.coef # single-index coefficients.
# }
# NOT RUN {
family = ocat(R=11) # ocat: ordered categorical response family, with R categories.
# the treatment A's effect.
tmp <- mgcv::gam(y ~ A, family = family)
exp(coef(tmp)[2]) #odds ratio (OR) comparing treatment A=2 vs. A=1.
ind2 <- pred.simml(res)$trt.rule ==2 # subgroup recommended with A=2 under SIMML ITR
tmp2 <- mgcv::gam(y[ind2] ~ A[ind2], family = family)
exp(coef(tmp2)[2]) #OR comparing treatment A=2 vs. A=1, for subgroup recommended with A=2
ind1 <- pred.simml(res)$trt.rule ==1 # subgroup recommended with A=1 under SIMML ITR
tmp1 <- mgcv::gam(y[ind1] ~ A[ind1], family = family)
exp(coef(tmp1)[2]) #OR comparing treatment A=2 vs. A=1, for subgroup recommended with A=2
# }
Run the code above in your browser using DataCamp Workspace