# NOT RUN {
set.seed(1234)
n.test <- 500
## simulation 1
# generate training data
p <- 30
n <- 200
X <- matrix(runif(n*p,-1,1),ncol=p)
A <- runif(n,0,2)
D_opt <- 1 + 0.5*X[,2] + 0.5*X[,1]
mean.fn <- function(X, D_opt, A){ 8 + 4*X[,1] - 2*X[,2] - 2*X[,3] - 25*((D_opt-A)^2) }
mu <- mean.fn(X, D_opt, A)
y <- rnorm(length(mu),mu,1)
# fit SIMSL
simsl.obj <- simsl(y=y, A=A, X=X)
# generate testing data
X.test <- matrix(runif(n.test*p,-1,1),ncol=p)
A.test <- runif(n.test,0,2)
f_opt.test <- 1 + 0.5*X.test[,2] + 0.5*X.test[,1]
pred <- pred.simsl(simsl.obj, newX= X.test) # make prediction based on the estimated SIMSL
value <- mean(8 + 4*X.test[,1] - 2*X.test[,2] - 2*X.test[,3] - 25*((f_opt.test- pred$trt.rule)^2))
value # "value" of the estimated treatment rule; the "oracle" value is 8.
## simulation 2
p <- 10
n <- 400
# generate training data
X <- matrix(runif(n*p,-1,1),ncol=p)
A <- runif(n,0,2)
f_opt <- I(X[,1] > -0.5)*I(X[,1] < 0.5)*0.6 + 1.2*I(X[,1] > 0.5) +
1.2*I(X[,1] < -0.5) + X[,4]^2 + 0.5*log(abs(X[,7])+1) - 0.6
mu <- 8 + 4*cos(2*pi*X[,2]) - 2*X[,4] - 8*X[,5]^3 - 15*abs(f_opt-A)
y <- rnorm(length(mu),mu,1)
Xq <- cbind(X, X^2) # include a quadratic term
# fit SIMSL
simsl.obj <- simsl(y=y, A=A, X=Xq)
# generate testing data
X.test <- matrix(runif(n.test*p,-1,1),ncol=p)
A.test <- runif(n.test,0,2)
f_opt.test <- I(X.test[,1] > -0.5)*I(X.test[,1] < 0.5)*0.6 + 1.2*I(X.test[,1] > 0.5) +
1.2*I(X.test[,1] < -0.5) + X.test[,4]^2 + 0.5*log(abs(X.test[,7])+1) - 0.6
Xq.test <- cbind(X.test, X.test^2)
pred <- pred.simsl(simsl.obj, newX= Xq.test) # make prediction based on the estimated SIMSL
value <- mean(8 + 4*cos(2*pi*X.test[,2]) - 2*X.test[,4] - 8*X.test[,5]^3 -
15*abs(f_opt.test-pred$trt.rule))
value # "value" of the estimated treatment rule; the "oracle" value is 8.
# }
# NOT RUN {
### air pollution data application
data(chicago); head(chicago)
chicago <- chicago[,-3][complete.cases(chicago[,-3]), ]
chicago <- chicago[-c(2856:2859), ] # get rid of the gross outliers in y
chicago <- chicago[-which.max(chicago$pm10median), ] # get rid of the gross outliers in x
# create lagged variables
lagard <- function(x,n.lag=5) {
n <- length(x); X <- matrix(NA,n,n.lag)
for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
X
}
chicago$pm10 <- lagard(chicago$pm10median)
chicago <- chicago[complete.cases(chicago), ]
# create season varaible
chicago$time.day <- round(chicago$time %% 365)
# fit SIMSL for modeling the season-by-pm10 interactions on their effects on outcomes
simsl.obj <- simsl(y=chicago$death, A=chicago$time.day, X=chicago[,7], bs=c("cc","ps"),
ind.to.be.positive = 1, family="poisson", method = "REML",
bootstrap =FALSE) # bootstrap = TRUE
simsl.obj$beta.coef # the estimated single-index coefficients
summary(simsl.obj$g.fit)
#round(simsl.obj$boot.ci,3)
mgcv::vis.gam(simsl.obj$g.fit, view=c("A","single.index"), theta=-135, phi = 30,
color="heat", se=2,ylab = "single-index", zlab = " ",
main=expression(paste("Interaction surface g")))
### Warfarin data application
data(warfarin)
X <- warfarin$X
A <- warfarin$A
y <- -abs(warfarin$INR - 2.5) # the target INR is 2.5
X[,1:3] <- scale(X[,1:3]) # standardize continuous variables
# Estimate the main effect, using an additive model
mu.fit <- mgcv::gam(y-mean(y) ~ X[, 4:13] +
s(X[,1], k=5, bs="ps")+
s(X[,2], k=5, bs="ps") +
s(X[,3], k=5, bs="ps"), method="REML")
summary(mu.fit)
mu.hat <- predict(mu.fit)
# fit SIMSL
simsl.obj <- simsl(y, A, X, Xm= mu.hat, scale.X = FALSE, center.X=FALSE, method="REML",
bootstrap = FALSE) # bootstrap = TRUE
simsl.obj$beta.coef
#round(simsl.obj$boot.ci,3)
mgcv::vis.gam(simsl.obj$g.fit, view=c("A","single.index"), theta=52, phi = 18,
color="heat", se=2, ylab = "single-index", zlab = "Y",
main=expression(paste("Interaction surface g")))
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab