# NOT RUN {
library(wsbackfit)
###############################################
# Gaussian Simulated Sample
###############################################
set.seed(123)
# Define the data generating process
n <- 1000
x1 <- runif(n)*4-2
x2 <- runif(n)*4-2
x3 <- runif(n)*4-2
x4 <- runif(n)*4-2
x5 <- as.numeric(runif(n)>0.6)
f1 <- 2*sin(2*x1)
f2 <- x2^2
f3 <- 0
f4 <- x4
f5 <- 1.5*x5
mu <- f1 + f2 + f3 + f4 + f5
err <- (0.5 + 0.5*x5)*rnorm(n)
y <- mu + err
df <- data.frame(x1 = x1, x2 = x2, x3 = x3, x4 = x4, x5 = as.factor(x5), y = y)
# Fit the model with a fixed bandwidth for each covariate
m0 <- sback(formula = y ~ x5 + sb(x1, h = 0.1) + sb(x2, h = 0.1)
+ sb(x3, h = 0.1) + sb(x4, h = 0.1), kbin = 30, data = df)
summary(m0)
op <- par(no.readonly = TRUE)
par(mfrow = c(2,2))
plot(m0)
# Fit the model with bandwidths selectec using K-fold cross-validation
# }
# NOT RUN {
m0cv <- sback(formula = y ~ x5 + sb(x1) + sb(x2)
+ sb(x3) + sb(x4), kbin = 30, bw.grid = seq(0.01, 0.99, length = 30), KfoldCV = 5,
data = df)
summary(m0cv)
par(mfrow = c(2,2))
plot(m0cv)
# }
# NOT RUN {
# Estimate Variance as a function of x5 (which is binary)
resid <- y - m0$fitted.values
sig0 <- var(resid[x5 == 0])
sig1 <- var(resid[x5 == 1])
w <- x5/sig1 + (1-x5)/sig0
m1 <- sback(formula = y ~ x5 + sb(x1, h = 0.1) + sb(x2, h = 0.1)
+ sb(x3, h = 0.1) + sb(x4, h = 0.1), weights = w, kbin = 30, data = df)
summary(m1)
par(mfrow = c(2,2))
plot(m1)
###############################################
# Poisson Simulated Data
###############################################
set.seed(123)
# Define the data generating process
n <- 1000
x1 <- runif(n,-1,1)
x2 <- runif(n,-1,1)
eta <- 2 + 3*x1^2 + 5*x2^3
exposure <- round(runif(n, 50, 500))
y <- rpois(n, exposure*exp(eta))
df <- data.frame(y = y, x1 = x1, x2 = x2)
# Fit the model
m2 <- sback(formula = y ~ sb(x1, h = 0.1) + sb(x2, h = 0.1),
data = df, offset = log(exposure),
kbin = 30, family = "poisson")
summary(m2)
par(mfrow = c(1,2))
plot(m2)
# Dataframe and offset for prediction
n.p <- 100
newoffset <- rep(0, n.p)
df.pred <- data.frame(x1 = seq(-1, 1,l = n.p), x2 = seq(-1, 1,l = n.p))
m2p <- predict(m2, newdata = df.pred, newoffset = newoffset)
###############################################
# Postoperative Infection Data
###############################################
data(infect)
# Generalized varying coefficient model with binary response
m3 <- sback(formula = inf ~ sb(gluc, h = 10) + sb(gluc, by = linf, h = 10),
data = infect, family = "binomial", kbin = 15)
summary(m3)
# Plot both linear and non linear
# components of nonparametric functions: composed = FALSE
par(mfrow = c(1,3))
plot(m3, composed = FALSE)
# Personalized plots
# First obtain predictions in new data
# Create newdata for prediction
ngrid <- 30
gluc0 <- seq(50, 190, length = ngrid)
linf0 <- seq(0, 45, length = ngrid)
df <- expand.grid(gluc = gluc0, linf = linf0)
m3p <- predict(m3, newdata = df)
par(mfrow = c(1,2))
ii <- order(df[,"gluc"])
## Parametric coefficients
names(m3p$coeff)
# Nonlinear components
colnames(m3p$peffects)
# Include the linear component
plot(df[ii,"gluc"], m3p$coeff[["gluc"]]*df[ii,"gluc"] +
m3p$peffects[ii,"sb(gluc, h = 10)"],
type = 'l', xlab = "Glucose (mg/dl)", ylab = "f_1(gluc)",
main = "Nonparametric effect of Glucose")
# Include the linear component
plot(df[ii,"gluc"], m3p$coeff[["gluc:linf"]]*df[ii,"gluc"] +
m3p$peffects[ii,"sb(gluc, h = 10, by = linf)"],
type= 'l', xlab = "Glucose (mg/dl)", ylab = "f_2(gluc)",
main = "Varying coefficients as a function of Glucose")
# Countour plot of the probability of post-opererational infection
n <- sqrt(nrow(df))
Z <- matrix(m3p$pfitted.values, n, n)
filled.contour(z = Z, x = gluc0, y = linf0,
xlab = "Glucose (mg/dl)", ylab = "Lymphocytes (%)",
main = "Probability of post-opererational infection")
par(op)
# }
Run the code above in your browser using DataLab