## Not run:
#
#
# ## handedness introductory example
# ############################################################
#
# data(handy)
#
# (out <- hierarchical(~ Gender + Handedness, data = handy))
#
# # hierarchical performs the same tasks as loglin and loglm,
# # but hierarchical gives the exact test p values and more statistics
# statsFit <- stats::loglin(handy, list(c(1),c(2)), fit = TRUE, param = TRUE)
# massFit <- MASS::loglm(~ Gender + Handedness, data = handy)
# # loglm is just a wrapper of loglin
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# # comparisons between hierarchical and loglin
# ############################################################
#
# # the expected table given the sufficient statistics can be computed
# # via two methods, iterative proportional fitting, and the mcmc itself:
# out$exp # ipf
# hierarchical(~ Gender + Handedness, data = handy, method = "mcmc")$exp
# statsFit$fit # the equivalent in loglin; this is used by default in hierarchical
#
#
#
#
# # the parameter values of the loglinear model can be accessed
# out$param
# statsFit$param
#
#
#
#
# # the p-value for the overall model is available as well
# # hierarchical gives the exact conditional p-value
# # (conditional on the sufficient statistics)
# # the five numbers correspond the probability of tables that are
# # "more weird" than the observed table, where "more weird" is determined
# # by having a larger X2 value (or G2, FT, CR, or NM)
# out$p.value
# fisher.test(handy)$p.value # out$p.value["X2"] is accurate to monte carlo error
#
#
# # loglin gives the p-values using the unconditional asymptotic distributions
# c(
# "X2" = pchisq(statsFit$pearson, df = statsFit$df, lower.tail = FALSE),
# "G2" = pchisq(statsFit$lrt, df = statsFit$df, lower.tail = FALSE)
# )
#
# out$mid.p.value # the mid (exact conditional) p-value is also available
#
#
#
#
# # the test statistics based on the observed table and the expected
# # table under the model are available
# out$statistic
# c(statsFit$pearson, statsFit$lrt) # loglin only gives X2 and G2
#
#
#
#
# # the markov basis used for the proposal distribution of the metropolis-hastings
# # algorithm are returned. the proposal distribution is uniform on +/-
# # the moves added to the current table
# out$moves
# # they are easier understood as tables
# vec2tab(out$moves, dim(handy))
# # notice that the marginals stay fixed:
# handy + vec2tab(out$moves, dim(handy))
#
#
#
#
# # these were computed as the markov basis of the integer matrix
# out$A
# markov(out$A)
# out$moves
#
#
#
#
# # the moves are also sometimes written in tableau form (LAS p.13)
# tableau(out$moves, dim(handy))
# # that's +1 the the table in elements [1,1] and [2,2]
# # and -1 in the table in elements [1,2] and [2,1]
#
#
#
#
# # the acceptance probability of the MCMC is retained
# out$acceptProb
#
#
#
#
# # various model assessment measures are also available
# out$quality
#
#
#
#
# # the number of independent parameters per term are in df
# out$df
#
#
#
#
# # as an added help, you may find the visuals in vcd useful:
# # library(vcd)
# # mosaic(~ Gender + Handedness, data = handy, shade = TRUE, legend = TRUE)
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# ## politics example - with computing the exact p value by hand
# ############################################################
#
# data(politics)
#
# (out <- hierarchical(~ Personality + Party, data = politics))
# statsFit <- stats::loglin(politics, as.list(1:2), fit = TRUE, param = TRUE)
#
# out$p.value
# # exact without monte-carlo error
# sum(dhyper(c(0:3,6:9), 10, 10, 9))
# fisher.test(politics)$p.value
# round(dhyper(0:9, 10, 10, 9), 4)
#
#
# # comparisons :
# out$exp
# statsFit$fit
#
# out$param
# statsFit$param
#
# out$p.value # exact
# c(
# "X2" = pchisq(statsFit$pearson, df = statsFit$df, lower.tail = FALSE),
# "G2" = pchisq(statsFit$lrt, df = statsFit$df, lower.tail = FALSE)
# ) # asymptotic approximation
# fisher.test(politics)$p.value # accurate to monte carlo error
#
# out$statistic # accurate to monte carlo error
# c(statsFit$pearson, statsFit$lrt)
#
# # mosaic(~ Personality + Party, data = politics, shade = TRUE, legend = TRUE)
#
#
#
#
#
#
#
#
#
#
#
# ## eyeHairColor from the Diaconis and Sturmfels reference
# ############################################################
#
# data(HairEyeColor)
# eyeHairColor <- margin.table(HairEyeColor, 2:1)
#
# outC <- hierarchical(~ Eye + Hair, data = eyeHairColor)
# outR <- hierarchical(~ Eye + Hair, data = eyeHairColor, engine = "R")
#
# # doesn't work even with workspace = 2E9 (with over 4.5Gb in memory)
# #fisher.test(eyeHairColor, hybrid = TRUE, workspace = 2E9)
#
# tableau(outC$moves, dim(eyeHairColor))
#
#
# # library(microbenchmark)
# # microbenchmark(
# # hierarchical(~ Eye + Hair, data = eyeHairColor),
# # hierarchical(~ Eye + Hair, data = eyeHairColor, engine = "R")
# # )
# # 5-10 times faster; much faster with increased iter
#
#
# # mosaic(~ Eye + Hair, data = HairEyeColor, shade = TRUE, legend = TRUE)
#
#
#
#
#
#
# ## abortion preference example from the
# ## Diaconis and Sturmfels reference pp. 379--381
# ## a no 3-way interaction model
# ############################################################
#
# data(abortion)
#
# out <- hierarchical(
# ~ Education*Abortion + Abortion*Denomination + Education*Denomination,
# data = abortion,
# iter = 10000, burn = 50000, thin = 50
# )
# out$p.value
#
#
# vec2tab(rowMeans(out$steps), dim(abortion)) # cf. p. 380
# loglin(abortion, subsets(1:3, 2), fit = TRUE)$fit
#
#
#
# out$param
# loglin(abortion, subsets(1:3, 2), param = TRUE)$param
#
#
#
# qqplot(rchisq(1055, df = 8), out$sampsStats$X2s)
# curve(1*x, from = 0, to = 30, add = TRUE, col = "red")
#
# ( nMoves <- 2*ncol(out$moves) ) # DS uses 110
# # the markov basis is larger than it needs to be
#
#
#
#
#
#
#
#
#
#
# ## loglin no three-way interaction model example
# ############################################################
#
# # the help for fits the no three-way interaction model on HairEyeColor,
# # finds a .66196 p-value using the asymptotic distribution, and concludes
# # a good fit:
# data(HairEyeColor)
#
# fit <- loglin(HairEyeColor, subsets(1:3, 2), fit = TRUE, param = TRUE)
# mod <- hierarchical(~ Eye*Hair + Hair*Sex + Eye*Sex, data = HairEyeColor)
#
#
#
#
# # p values
# pchisq(fit$lrt, fit$df, lower.tail = FALSE) # see ?loglin
# mod$p.value
#
# # test statistics
# c(fit$pearson, fit$lrt)
# mod$statistic
#
# # fits (estimated tables)
# fit$fit
# mod$exp
# mod$obs
#
#
# # checking the autocorrelation
# acf(mod$sampsStats$PRs)
# mod <- hierarchical(~ Eye*Hair + Hair*Sex + Eye*Sex, data = HairEyeColor, thin = 100)
# acf(mod$sampsStats$PRs) # got it!
#
#
# # the slight differences in fit$fit and mod$exp (both done with ipf from loglin)
# # are due to differences in variable order:
# loglin(HairEyeColor, subsets(1:3, 2), fit = TRUE)$fit
# loglin(HairEyeColor, subsets(1:3, 2)[c(1,3,2)], fit = TRUE)$fit
#
# # a few model moves
# vec2tab(mod$moves[,1], dim(HairEyeColor))
# vec2tab(mod$moves[,50], dim(HairEyeColor))
# -vec2tab(mod$moves[,50], dim(HairEyeColor))
#
# # they contribute 0 to the marginals of the table
# vec2tab(mod$moves[,50], dim(HairEyeColor))
# mod$A %*% mod$move[,50]
# vec2tab(mod$A %*% mod$move[,50], dim(HairEyeColor))
#
# HairEyeColor
# HairEyeColor + vec2tab(mod$moves[,50], dim(HairEyeColor))
#
#
#
#
#
#
#
#
# ## a table with positive marginals but no MLE for
# ## the no-three way interaction model
# ############################################################
#
#
# data(haberman)
#
# mod <- hierarchical(~ X1*X2 + X2*X3 + X1*X3, data = haberman)
#
# statsFit <- loglin(haberman, subsets(1:3, 2), param = TRUE, fit = TRUE)
# statsFit$fit
# statsFit$param
# c(statsFit$pearson, statsFit$lrt)
#
# algstatFit <- hierarchical(~ X1*X2 + X2*X3 + X1*X3, data = haberman, method = "mcmc")
# algstatFit$exp
# algstatFit$param
# algstatFit$statistic
#
#
#
#
#
#
#
#
#
#
#
#
# ## an example from agresti, p.322
# ############################################################
#
# data(drugs)
# ftable(aperm(drugs, c(3, 1, 2))) # = table 8.3
#
# out <- hierarchical(~Alcohol + Cigarette + Marijuana, data = drugs)
# matrix(round(aperm(out$exp, c(2,1,3)), 1), byrow = FALSE)
#
# loglin(drugs, as.list(1:3), fit = TRUE)$fit
# loglin(drugs, as.list(1:3), param = TRUE)$param
#
# # # the saturated model issues a warning from markov, but works :
# # out <- hierarchical(~Alcohol * Cigarette * Marijuana, data = drugs)
# # matrix(round(aperm(out$exp, c(2,1,3)), 1), byrow = FALSE)
#
#
# ftable(aperm(out$exp, c(3,1,2)))
#
# stats <- loglin(drugs, as.list(1:3), fit = TRUE, param = TRUE)
#
#
# # considered via glm
#
# df <- as.data.frame(drugs)
# mod <- glm(Freq ~ Alcohol + Cigarette + Marijuana, data = df, family = poisson)
# summary(mod)
# mod$fitted.values
#
#
# # the same can be done with glm :
#
# mod <- glm(
# Freq ~ Alcohol + Cigarette + Marijuana,
# data = as.data.frame(drugs), family = poisson
# )
# summary(mod)
# matrix(round(mod$fitted.values[c(1,3,2,4,5,7,6,8)],1))
#
#
#
# mod <- glm(
# Freq ~ Alcohol * Cigarette + Marijuana,
# data = as.data.frame(drugs), family = poisson
# )
# summary(mod)
# matrix(round(mod$fitted.values[c(1,3,2,4,5,7,6,8)],1))
#
#
# mod <- glm(
# Freq ~ Alcohol * Cigarette * Marijuana,
# data = as.data.frame(drugs), family = poisson
# )
# summary(mod)
# matrix(round(mod$fitted.values[c(1,3,2,4,5,7,6,8)],1))
#
#
#
#
#
#
#
#
#
#
#
#
#
# ## End(Not run)
Run the code above in your browser using DataLab