set.seed(30)
# continuous outcome
dat <- data.frame(y=rnorm(100), x1=runif(100), x2=runif(100), z=runif(100))
# Conditional linear slope
qgcomp.glm.noboot(y ~ z + x1 + x2, expnms = c('x1', 'x2'), data=dat, q=4, family=gaussian())
# Marginal linear slope (population average slope, for a purely linear,
# additive model this will equal the conditional)
qgcomp.glm.ee(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), data=dat, q=4,
family=gaussian())
qgcomp.glm.ee(f=y ~ x1 + x2 + I(x1*x2) + z, expnms = c('x1', 'x2'), data=dat, q=4,
family=gaussian())
# no intercept model
qgcomp.glm.ee(f=y ~ -1+z + x1 + x2, expnms = c('x1', 'x2'), data=dat, q=4,
family=gaussian())
# Note that these give different answers! In the first, the estimate is conditional on Z,
# but in the second, Z is marginalized over via standardization. The estimates
# can be made approximately the same by centering Z (for linear models), but
# the conditional estimate will typically have lower standard errors.
dat$z = dat$z - mean(dat$z)
# Conditional linear slope
qgcomp.glm.noboot(y ~ z + x1 + x2, expnms = c('x1', 'x2'), data=dat, q=4, family=gaussian())
# Marginal linear slope (population average slope, for a purely linear,
# additive model this will equal the conditional)
qgcomp.glm.ee(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), data=dat, q=4,
family=gaussian())
if (FALSE) {
# Population average mixture slope which accounts for non-linearity and interactions
# Note this is one use case for the estimating equations approach: replacing bootstrapping
qgcomp.glm.ee(y ~ z + x1 + x2 + I(x1^2) + I(x2*x1), family="gaussian",
expnms = c('x1', 'x2'), data=dat, q=4)
qgcomp.glm.boot(y ~ z + x1 + x2 + I(x1^2) + I(x2*x1), family="gaussian",
expnms = c('x1', 'x2'), data=dat, q=4, B=1000)
# generally non-linear/non-addiive underlying models lead to non-linear mixture slopes
dat$y = dat$y + dat$x1*dat$x2
qgcomp.glm.ee(y ~ z + x1 + x2 + I(x1^2) + I(x2*x1), family="gaussian",
expnms = c('x1', 'x2'), data=dat, q=4, deg=2)
qgcomp.glm.boot(y ~ z + x1 + x2 + I(x1^2) + I(x2*x1), family="gaussian",
expnms = c('x1', 'x2'), data=dat, q=4, deg=2, B=1000)
# binary outcome
dat <- data.frame(y=rbinom(50,1,0.5), x1=runif(50), x2=runif(50), z=runif(50))
# Conditional mixture OR
qgcomp.glm.noboot(y ~ z + x1 + x2, family="binomial", expnms = c('x1', 'x2'),
data=dat, q=2)
#Marginal mixture OR (population average OR - in general, this will not equal the
# conditional mixture OR due to non-collapsibility of the OR)
qgcomp.glm.boot(y ~ z + x1 + x2, family="binomial", expnms = c('x1', 'x2'),
data=dat, q=2, B=300, MCsize=5000, rr=FALSE)
qgcomp.glm.ee(y ~ z + x1 + x2, family="binomial", expnms = c('x1', 'x2'),
data=dat, q=2, rr=FALSE)
# Population average mixture RR
qgcomp.glm.boot(y ~ z + x1 + x2, family="binomial", expnms = c('x1', 'x2'),
data=dat, q=2, B=300, MCsize=5000, rr=TRUE)
qgcomp.glm.ee(y ~ z + x1 + x2, family="binomial", expnms = c('x1', 'x2'),
data=dat, q=2, rr=TRUE)
# getting the RR with the poisson trick
qgcomp.glm.ee(y ~ z + x1 + x2, family="poisson", expnms = c('x1', 'x2'),
data=dat, q=2)
qgcomp.glm.boot(y ~ z + x1 + x2, family="poisson", expnms = c('x1', 'x2'),
data=dat, q=2, B=300, MCsize=5000)
# Population average mixture RR, indicator variable representation of x2
# note that I(x==...) operates on the quantile-based category of x,
# rather than the raw value
res = qgcomp.glm.ee(y ~ z + x1 + I(x2==1) + I(x2==2) + I(x2==3),
family="binomial", expnms = c('x1', 'x2'), data=dat, q=4, rr=TRUE)
res$fit
plot(res)
# now add in a non-linear MSM
res2a = qgcomp.glm.boot(y ~ z + x1 + I(x2==1) + I(x2==2) + I(x2==3),
family="binomial", expnms = c('x1', 'x2'), data=dat, q=4, rr=TRUE,
degree=2)
res2 = qgcomp.glm.ee(y ~ z + x1 + I(x2==1) + I(x2==2) + I(x2==3),
family="binomial", expnms = c('x1', 'x2'), data=dat, q=4, rr=TRUE,
degree=2)
# conditional model estimates (identical point estimates and similar std. errors)
summary(res2a$fit)$coefficients
res2$fit
# msm estimates (identical point estimates and different std. errors)
summary(res2a$msmfit)$coefficients # correct point estimates, incorrect standard errors
res2 # correct point estimates, correct standard errors
plot(res2)
# Log risk ratio per one IQR change in all exposures (not on quantile basis)
dat$x1iqr <- dat$x1/with(dat, diff(quantile(x1, c(.25, .75))))
dat$x2iqr <- dat$x2/with(dat, diff(quantile(x2, c(.25, .75))))
# note that I(x>...) now operates on the untransformed value of x,
# rather than the quantized value
res2 = qgcomp.glm.ee(y ~ z + x1iqr + I(x2iqr>0.1) + I(x2>0.4) + I(x2>0.9),
family="binomial", expnms = c('x1iqr', 'x2iqr'), data=dat, q=NULL, rr=TRUE,
degree=2)
res2
# weighted model
N=5000
dat4 <- data.frame(id=seq_len(N), x1=runif(N), x2=runif(N), z=runif(N))
dat4$y <- with(dat4, rnorm(N, x1*z + z, 1))
dat4$w=runif(N) + dat4$z*5
qdata = quantize(dat4, expnms = c("x1", "x2"), q=4)$data
# first equivalent models with no covariates
qgcomp.glm.noboot(f=y ~ x1 + x2, expnms = c('x1', 'x2'), data=dat4, q=4, family=gaussian())
qgcomp.glm.noboot(f=y ~ x1 + x2, expnms = c('x1', 'x2'), data=dat4, q=4, family=gaussian(),
weights=w)
set.seed(13)
qgcomp.glm.ee(f=y ~ x1 + x2, expnms = c('x1', 'x2'), data=dat4, q=4, family=gaussian())
qgcomp.glm.ee(f=y ~ x1 + x2, expnms = c('x1', 'x2'), data=dat4, q=4, family=gaussian(),
weights=w)
# using the correct model
set.seed(13)
qgcomp.glm.ee(f=y ~ x1*z + x2, expnms = c('x1', 'x2'), data=dat4, q=4, family=gaussian(),
weights=w, id="id")
(qgcfit <- qgcomp.glm.ee(f=y ~ z + x1 + x2, expnms = c('x1', 'x2'), data=dat4, q=4,
family=gaussian(), weights=w))
qgcfit$fit
summary(glm(y ~ z + x1 + x2, data = qdata, weights=w))
}
Run the code above in your browser using DataLab