## Generate some fake data
require(mvtnorm)
set.seed(12345)
N <- 2000
Df <- cbind(1, rmvnorm(N, mean=rep(0, 5)))
## Set coefficients
beta.a <- c(-2.00, 0.33, 0.66, 1.00)
beta.b <- c(0.00, 1.50, -0.25)
## Generate path probabilities following a normal model.
y.a <- as.vector(pnorm(tcrossprod(beta.a, Df[, 1:4])))
y.b <- as.vector(pnorm(tcrossprod(beta.b, Df[, c(1, 5, 6)])))
## AND and OR-model
or <- function(x, y) { x + y - x * y }
and <- function(x, y) { x * y }
y.star.OR <- or(y.a, y.b)
y.star.AND <- and(y.a, y.b)
## Observed responses
y.OR <- rbinom(N, 1, y.star.OR)
y.AND <- rbinom(N, 1, y.star.AND)
## Set up data.frame for estimation
Df <- cbind(1, Df)
Df <- as.data.frame(Df)
Df[,1] <- y.OR
Df[,2] <- y.AND
names(Df) <- c("y.OR", "y.AND", "x1", "x2", "x3", "x4", "x5")
## Before estimating, boolean models need to be specified using the
## boolprep function.
## OR model, specified to use a probit link for each causal path. This
## matches the data generating process above.
mod.OR <- boolprep(y.OR ~ (a | b), a ~ x1 + x2 + x3, b ~ x4 + x5,
data = Df, family=list(binomial("probit")))
## IF you really want to, it's also possible to specify a different
## link function for each causal path. These should be in the same
## order as specified in the model formula.
mod.OR2 <- boolprep(y.OR ~ (a | b), a ~ x1 + x2 + x3, b ~ x4 + x5,
data = Df, family=list(binomial("probit"),
binomial("logit")))
## Fit the prepared model using the nlminb optimizer (the default).
(fit.OR <- boolean(mod.OR, method="nlminb", control=list(trace=1)))
## Multiple optimizers can be specified in a single call to boolean.
## Here we fit with the nlm and nlminb optimizers.
(fit1.OR <- boolean(mod.OR, method=c("nlm", "nlminb")))
## Re-fit, with BFGS and a higher maximum number of iterations. All
## of the options that go along with nlm(), optim(), and genoud() should
## be transparently passable through boolean.
(fit2.OR <- boolean(mod.OR, method="BFGS", control = list(maxit = 500)))
## Induce a convergence warning message.
(fit3.OR <- boolean(mod.OR, method="BFGS", control = list(maxit = 5)))
## Not run:
# ## Now estimate model with genoud, a genetic optimizer. This has the
# ## capability of using multiple processors via parallel.
# (fit6.OR <- boolean(mod.OR, method="genoud",
# cluster=c("localhost", "localhost"),
# print.level=2))
#
# ## The default SANN optimizer is also available.
# (fit7.OR <- boolean(mod.OR, method="SANN"))
# ## End(Not run)
## The fit is stored as "model.fit", within the boolean object.
str(fit.OR$model.fit)
## Create a summary object, saving and printing it. Then take a look at
## the objects stored in the summary object.
(smry <- summary(fit.OR))
str(smry)
## Extract log-likelihood and coefficient vector.
logLik(fit.OR)
coef(fit.OR)
## Not run:
# ## Display the contours of the likelihood given a change the value of
# ## the coefficients. Despite the function name, these are not true
# ## profile likelihoods as they hold all other coefficients fixed at
# ## their MLE.
# (prof <- boolprof(fit.OR))
#
# ## Extract the plots for x1_a and x4_b.
# plot(prof, y = c("x1_a", "x4_b"))
# plot(prof, y = c(1, 3), scales = list(y = list(relation = "free")))
#
# ## You can also use variable or index matching with boolprof to select
# ## particular covariates of interest.
# boolprof(fit.OR, vars = c(1, 3))
# boolprof(fit.OR, vars = c("x1_a", "x4_b"))
#
# ## Plots of predicted probabilities are available through boolprob.
# ## With boolprob, either vars or newdata *must* be specified.
# boolprob(fit.OR, vars = c("x1_a", "x4_b"))
# boolprob(fit.OR, vars = c(2, 3, 4, 6))
#
# ## Specifying conf.int = TRUE produces simulated confidence intervals.
# ## The number of samples to pull from the distribution of the estimated
# ## coefficients is controlled by n; n=100 is default. This can take a
# ## while.
# (prob <- boolprob(fit.OR, vars = c(2, 3, 4, 6), n = 1000,
# conf.int = TRUE))
#
# ## Choose a different method estimate upon which to base the estimates.
# (prob <- boolprob(fit1.OR, method="nlm", vars=c(2, 3, 4, 6), n=1000,
# conf.int=TRUE))
#
# ## As with the other components of the model, you can extract the
# ## predicted probabilities.
# str(prob)
# prob$est
#
# ## Bootstrapping is also possible, and is the recommended method of
# ## making inferences. The boolbool function uses a simple sampling scheme:
# ## it resamples repeatedly from the provided data.frame. More the complex
# ## data structures with, for example, clustering, need to be dealt with
# ## manually.
# (bs <- boolboot(fit, n=10))
#
# ## boolboot supports bootstrapping across multiple processors.
# (bs <- boolboot(fit, n=100, cluster=c("localhost", "localhost")))
#
# ## End(Not run)
Run the code above in your browser using DataLab