# NOT RUN {
# Listed below is a collection of Basic Examples:
# https://homepage.divms.uiowa.edu/~jblang/mph.fitting/mph.basic.numerical.examples.htm
# Another collection of Less Basic Examples is online:
# https://homepage.divms.uiowa.edu/~jblang/mph.fitting/mph.numerical.examples.htm
# EXAMPLE 1. Test whether a binomial probability equals 0.5.
#
# y = (15, 22) <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# In other symbols,
#
# y = (15, 22) <- Y = (Y[1], Y[2]) ~ multinomial(37, p = (p[1], p[2])).
#
# GOAL: Test H0: p[1] = 0.5 vs. H1: not H0.
a1 <- mph.fit(y = c(15, 22), constraint = function(p) {p[1] - 0.5})
# Alternative specifications...
a2 <- mph.fit(y = c(15, 22), constraint = function(p) {p[1] - p[2]})
a3 <- mph.fit(y = c(15, 22), constraint = function(p) {log(p[1] / p[2])})
a4 <- mph.fit(y = c(15, 22), constraint = function(m) {m[1] - m[2]},
h.mean = TRUE)
a5 <- mph.fit(y = c(15, 22), link = function(p) {p}, X = matrix(1, 2, 1))
a6 <- mph.fit(y = c(15, 22), link = "logm", X = matrix(1, 2, 1))
# Alternatively, assume that
#
# y = (15, 22) <- Y ~ MP(gamma, p | strata = 1, fixed = "none");
# i.e. Y ~ indep Poisson.
#
# In other symbols,
#
# y = (15, 22) <- Y = (Y[1], Y[2]), where
# Y[i] indep ~ Poisson(gamma * p[i]), i = 1, 2.
#
# GOAL: Test H0: p[1] = 0.5 vs. H1: not H0.
b1 <- mph.fit(y = c(15, 22), constraint = function(p) {p[1] - 0.5},
fixed.strata = "none")
mph.summary(a1, TRUE)
mph.summary(b1, TRUE)
# EXAMPLE 2. Test whether a multinomial probability vector is uniform.
# Test whether a multinomial probability vector equals a
# specific value.
#
# y <- Y = (Y[1], ..., Y[6]) ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# In other symbols,
#
# y <- Y ~ multinomial(15, p = (p[1], ..., p[6]))
#
# GOAL: Test H0: p[1] = p[2] = ... = p[6] vs. H1: not H0.
y <- rmultinom(1, 15, rep(1, 6))
a1 <- mph.fit(y, L.fct = function(p) {p}, X = matrix(1, 6, 1),
y.eps = 0.1)
# Alternative specification...
a2 <- mph.fit(y, h.fct = function(p) {as.matrix(p[-6] - 1/6)},
y.eps = 0.1)
mph.summary(a1, TRUE)
mph.summary(a2, TRUE)
# Test whether p = (1, 2, 3, 1, 2, 3) / 12 .
p0 <- c(1, 2, 3, 1, 2, 3) / 12
b <- mph.fit(y, h.fct = function(p) {as.matrix(p[-6] - p0[-6])},
y.eps = 0.1)
mph.summary(b, TRUE)
# EXAMPLE 3. Test whether a multinomial probability vector satisfies a
# particular constraint.
#
# Data Source: Agresti 25:2002.
#
# y = (30, 63, 63) <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# In other symbols,
#
# y = (30, 63, 63) <- Y ~ multinomial(156, p = (p[1], p[2], p[3]))
#
# GOAL: Test H0: p[1] + p[2] = p[1] / (p[1] + p[2]) vs. H1: not H0.
y <- c(30, 63, 63)
h.fct <- function(p) {
(p[1] + p[2]) - p[1] / (p[1] + p[2])
}
a <- mph.fit(y, h.fct = h.fct)
mph.summary(a, TRUE)
# EXAMPLE 4. Test of Independence in a 2-by-2 Table.
#
# y = (y[1, 1], y[1, 2], y[2, 1], y[2, 2]) = (25, 18, 13, 21)
# <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# In other symbols,
# y = (y[1, 1], y[1, 2], y[2, 1], y[2, 2])
# <- Y ~ multinomial(77, p = (p[1, 1], p[1, 2], p[2, 1], p[2, 2]))
#
# GOAL: Test H0: p[1, 1] * p[2, 2] / p[1, 2] / p[2, 1] = 1
# vs. H1: not H0.
d <- data.frame(A = c(1, 1, 2, 2), B = c(1, 2, 1, 2),
count = c(25, 18, 13, 21))
a1 <- mph.fit(y = d$count, h.fct = function(p)
{log(p[1] * p[4] / p[2] / p[3])})
# Alternative specifications of independence....
a2 <- mph.fit(y = d$count, h.fct = function(p)
{p <- matrix(p, 2, 2, byrow = TRUE);
log(p[1, 1] * p[2, 2] / p[1, 2] / p[2, 1])})
a3 <- mph.fit(y = d$count, h.fct = function(p)
{p[1] * p[4] / p[2] / p[3] - 1})
a4 <- mph.fit(y = d$count, h.fct = function(p)
{p[1] / (p[1] + p[2]) - p[3] / (p[3] + p[4])})
a5 <- mph.fit(y = d$count, L.fct = "logm",
X = model.matrix(~ A + B, data = d))
# Suppose we wished to output observed and fitted values of
# log OR, OR, and P(B = 1 | A = 1) - P(B = 1 | A = 2)...
L.fct <- function(p) {
L <- as.matrix(c(
log(p[1] * p[4] / p[2] / p[3]),
p[1] * p[4] / p[2] / p[3],
p[1] / (p[1] + p[2]) - p[3] / (p[3] + p[4])
))
rownames(L) <- c("log OR", "OR",
"P(B = 1 | A = 1) - P(B = 1 | A = 2)")
L
}
a6 <- mph.fit(y = d$count, h.fct = function(p)
{log(p[1] * p[4] / p[2] / p[3])},
L.fct = L.fct, X = diag(3))
# Unrestricted Model...
b <- mph.fit(y = d$count, L.fct = L.fct, X = diag(3))
mph.summary(a6, TRUE)
mph.summary(b, TRUE)
# EXAMPLE 5. Test of Independence in a 4-by-4 Table.
# (Using Log-Linear Model.)
#
# Data Source: Table 2.8, Agresti, 57:2002.
#
# y <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# In other symbols,
# y <- Y ~ multinomial(96, p = (p[1, 1], p[1, 2], p[2, 1], p[2, 2]))
#
# GOAL: Test H0: p[1, 1] * p[2, 2] / p[1, 2] / p[2, 1] = 1 vs. H1: not H0.
d <- data.frame(Income = c("<15", "<15", "<15", "<15", "15-25", "15-25",
"15-25", "15-25", "25-40", "25-40", "25-40",
"25-40", ">40", ">40", ">40", ">40"),
JobSatisf = c("VD", "LD", "MS", "VS", "VD", "LD", "MS", "VS",
"VD", "LD", "MS", "VS", "VD", "LD", "MS", "VS"),
count = c(1, 3, 10, 6, 2, 3, 10, 7, 1, 6, 14, 12, 0, 1, 9, 11))
a <- mph.fit(y = d$count, link = "logp",
X = model.matrix(~ Income + JobSatisf, data = d))
mph.summary(a)
# Alternatively,
b <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ Income + JobSatisf, data = d))
mph.summary(b)
# EXAMPLE 6. Test Marginal Homogeneity in a 3-by-3 Table.
#
# Data Source: Table 10.16, Agresti, 445:2002.
#
# y <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# Specifically,
# y <- Y ~ multinomial(160, p = (p[1, 1], ..., p[3, 3]))
#
# GOAL: Test H0: p[1, +] = p[+, 1], p[2, +] = p[+, 2], p[3, +] = p[+, 3]
# vs. H1: not H0.
d <- data.frame(Siskel = c("Pro", "Pro", "Pro", "Mixed", "Mixed",
"Mixed", "Con", "Con", "Con"),
Ebert = c("Pro", "Mixed", "Con", "Pro", "Mixed",
"Con", "Pro", "Mixed", "Con"),
count = c(64, 9, 10, 11, 13, 8, 13, 8, 24))
h.fct <- function(p){
p.Siskel <- M.fct(d$Siskel) %*% p
p.Ebert <- M.fct(d$Ebert) %*% p
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}
a1 <- mph.fit(y = d$count, h.fct = h.fct)
mph.summary(a1, TRUE)
# Suppose that we wish to report on the observed and fitted
# marginal probabilities.
L.fct <- function(p) {
p.Siskel <- M.fct(d$Siskel) %*% p
p.Ebert <- M.fct(d$Ebert) %*% p
L <- as.matrix(c(p.Siskel, p.Ebert))
rownames(L) <- c(paste(sep = "", "P(Siskel=", levels(as.factor(d$Siskel)), ")"),
paste(sep = "", "P(Ebert=", levels(as.factor(d$Ebert)), ")"))
L
}
a2 <- mph.fit(y = d$count, h.fct = h.fct, L.fct = L.fct, X = diag(6))
mph.summary(a2, TRUE)
# M.fct(factor) %*% p gives the marginal probabilities corresponding to
# the levels of 'factor'. The marginal probabilities are ordered by the
# levels of 'factor'.
#
# Alternatively, in this rectangular table setting, we can find the
# marginal probabilities using the apply(...) function. In this case,
# the marginal probabilities are ordered as they are entered in the
# data set.
h.fct <- function(p) {
p <- matrix(p, 3, 3, byrow = TRUE)
p.Siskel <- apply(p, 1, sum)
p.Ebert <- apply(p, 2, sum)
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}
L.fct <- function(p) {
p <- matrix(p, 3, 3, byrow = TRUE)
p.Siskel <- apply(p, 1, sum)
p.Ebert <- apply(p, 2, sum)
L <- as.matrix(c(p.Siskel, p.Ebert))
rownames(L) <- c("P(Siskel=Pro)", "P(Siskel=Mixed)",
"P(Siskel=Con)", "P(Ebert=Pro)",
"P(Ebert=Mixed)", "P(Ebert=Con)")
L
}
b <- mph.fit(y = d$count, h.fct = h.fct, L.fct = L.fct, X = diag(6))
# EXAMPLE 7. Log-Linear Model for 2-by-2-by-2 Table.
#
# Data Source: Table 8.16, Agresti 347:2002
#
# y <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# Specifically,
#
# y <- Y ~ multinomial(621, p).
#
# The counts in y are cross-classification counts for variables
# G = Gender, I = Information Opinion, H = Health Opinion.
#
# GOAL: Fit the loglinear models [GI, GH, IH] and [G, IH].
d <- data.frame(G = c("Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female"),
I = c("Support", "Support", "Oppose", "Oppose",
"Support", "Support", "Oppose", "Oppose"),
H = c("Support", "Oppose", "Support", "Oppose",
"Support", "Oppose", "Support", "Oppose"),
count = c(76, 160, 6, 25, 114, 181, 11, 48))
# Fit loglinear model [GI, GH, IH]...
a1 <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ G + I + H + G:I + G:H + I:H, data = d))
# Fit loglinear model [G, IH]...
a2 <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ G + I + H + I:H, data = d))
# Different Sampling Distribution Assumptions:
#
# Alternatively, assume
# y <- Y ~ MP(gamma, p | strata = 1, fixed = "none");
# that is, Y ~ indep Poisson.
#
# In other symbols,
# y <- Y, where Y[i] indep ~ Poisson(m[i] = gamma * p[i]).
# Here, gamma is the unknown expected sample size.
b2 <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ G + I + H + I:H, data = d),
fixed = "none")
# Alternatively, assume
# y <- Y ~ MP(gamma, p | strata = Gender, fixed = "all");
# that is, Y ~ prod multinomial.
#
# In other symbols,
# y <- Y = (Y[1, 1, 1], Y[1, 1, 2], ..., Y[2, 2, 2]),
# where (Y[i, 1, 1], ..., Y[i, 2, 2]) indep ~ multinomial(n[i], p[i, , ]).
# Here, p[i, j, k] = P(I = j, H = k | G = i) and n[1] = 267 and
# n[2] = 354 are the a priori fixed sample sizes for males and females.
c2 <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ G + I + H + I:H, data = d),
strata = d$G)
# Alternatively, assume
# y <- Y ~ MP(gamma, p | strata = Gender, fixed = "none");
# that is, Y ~ prod Poisson.
#
# In other symbols,
# y <- Y = (Y[1, 1, 1], Y[1, 1, 2], ..., Y[2, 2, 2]),
# where Y[i, j, k] indep ~ Poisson(m[i, j, k] = gamma[i] * p[i, j, k]).
# Here, p[i, j, k] = P(I = j, H = k | G = i) and gamma[1] and gamma[2] are the
# unknown expected sample sizes for males and for females.
d2 <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ G + I + H + I:H, data = d),
strata = d$G, fixed = "none")
cbind(a2$m, b2$m, c2$m, d2$m, sqrt(diag(a2$covm)), sqrt(diag(b2$covm)),
sqrt(diag(c2$covm)), sqrt(diag(d2$covm)))
cbind(a2$p, b2$p, c2$p, d2$p, sqrt(diag(a2$covp)), sqrt(diag(b2$covp)),
sqrt(diag(c2$covp)), sqrt(diag(d2$covp)))
# EXAMPLE 8. Fit Linear-by-Linear Log-Linear Model
#
# Data Source: Table 8.15, Agresti, 345:2002
#
# y <- Y ~ MP(gamma, p | strata = 1, fixed = "all");
# i.e. Y ~ multinomial.
#
# Specifically,
# y <- Y ~ multinomial(1425, p)
#
# GOAL: Assess the fit of the linear-by-linear log-linear model.
d <- list(Schooling = c("<HS", "<HS", "<HS", "HS", "HS", "HS", ">HS", ">HS", ">HS"),
Abortion = c("Disapprove", "Middle", "Approve", "Disapprove", "Middle",
"Approve", "Disapprove", "Middle", "Approve"),
count = c(209, 101, 237, 151, 126, 426, 16, 21, 138))
Schooling.score <- -1 * (d$Schooling == "<HS") +
0 * (d$Schooling == "HS") +
1 * (d$Schooling == ">HS")
Abortion.score <- -1 * (d$Abortion == "Disapprove") +
0 * (d$Abortion == "Middle") +
1 * (d$Abortion == "Approve")
d <- data.frame(d, Schooling.score, Abortion.score)
a <- mph.fit(y = d$count, link = "logm",
X = model.matrix(~ Schooling + Abortion +
Schooling.score : Abortion.score, data = d))
mph.summary(a, TRUE)
# EXAMPLE 9. Marginal Standardization of a Contingency Table.
#
# Data Source: Table 8.15, Agresti 345:2002.
#
# GOAL: For a two-way table, find the standardized values of y, say y*,
# that satisfy (i) y* has the same odds ratios as y, and
# (ii) y* has row and column totals equal to 100.
#
# Note: This is equivalent to the problem of finding the fitted values
# for the following model...
# x <- Y ~ multinomial(n, p = (p[1, 1], ..., p[3, 3]))
# p[1, +] = p[2, +] = p[3, +] = p[+, 1] = p[+, 2] = p[+, 3] = 1/3
# p[1, 1] * p[2, 2] / p[2, 1] / p[1, 2] = or[1, 1]
# p[1, 2] * p[2, 3] / p[2, 2] / p[1, 3] = or[1, 2]
# p[2, 1] * p[3, 2] / p[3, 1] / p[2, 2] = or[2, 1]
# p[2, 2] * p[3, 3] / p[3, 2] / p[2, 3] = or[2, 2],
# where or[i, j] = y[i, j] * y[i + 1, j + 1] / y[i + 1, j] / y[i, j + 1]
# are the observed (y) odds ratios.
# If m is the vector of fitted values, then y* = m * 300 / sum(m)
# are the standardized values of y.
# Here x can be any vector of 9 counts.
# Choosing x so that the sum is 300 leads to sum(m) = 300, so that
# y* = m in this case.
d <- data.frame(Schooling = c("<HS", "<HS", "<HS", "HS", "HS", "HS", ">HS", ">HS", ">HS"),
Abortion = c("Disapprove", "Middle", "Approve", "Disapprove", "Middle",
"Approve", "Disapprove", "Middle", "Approve"),
count = c(209, 101, 237, 151, 126, 426, 16, 21, 138))
h.fct <- function(p) {
p.Schooling <- M.fct(d$Schooling) %*% p
p.Abortion <- M.fct(d$Abortion) %*% p
p <- matrix(p, 3, 3, byrow = TRUE)
as.matrix(c(
p.Schooling[-3] - 1/3, p.Abortion[-3] - 1/3,
p[1, 1] * p[2, 2] / p[2, 1] / p[1, 2] - 209 * 126 / 151 / 101,
p[1, 2] * p[2, 3] / p[2, 2] / p[1, 3] - 101 * 426 / 126 / 237,
p[2, 1] * p[3, 2] / p[3, 1] / p[2, 2] - 151 * 21 / 16 / 126,
p[2, 2] * p[3, 3] / p[3, 2] / p[2, 3] - 126 * 138 / 21 / 426
))
}
b <- mph.fit(y = d$count, h.fct = h.fct)
ystar <- b$m * 300 / sum(b$m)
matrix(round(ystar, 1), 3, 3, byrow = TRUE)
x <- c(rep(33, 8), 36)
b <- mph.fit(y = x, h.fct = h.fct)
ystar <- b$m
matrix(round(ystar, 1), 3, 3, byrow = TRUE)
# EXAMPLE 10. Cumulative Logit Model.
#
# Data Source: Table 7.19, Agresti, 306:2002.
#
# y <- Y ~ MP(gamma, p | strata = Therapy * Gender, fixed = "all");
# i.e. Y ~ prod multinomial.
#
# Here, y[i, j, k] is the cross-classification count corresponding to
# Therapy = i, Gender = j, Response = k.
#
# The table probabilities are defined as
# p[i, j, k] = P(Response = k | Therapy = i, Gender = j).
#
# Goal: Fit the cumulative logit proportional odds model that includes
# the main effect of Therapy and Gender.
d <- data.frame(Therapy = c("Sequential", "Sequential", "Sequential", "Sequential",
"Sequential", "Sequential", "Sequential", "Sequential",
"Alternating", "Alternating", "Alternating", "Alternating",
"Alternating", "Alternating", "Alternating", "Alternating"),
Gender = c("Male", "Male", "Male", "Male", "Female", "Female",
"Female", "Female", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female"),
Response = c("Progressive", "NoChange", "Partial", "Complete",
"Progressive", "NoChange", "Partial", "Complete",
"Progressive", "NoChange", "Partial", "Complete",
"Progressive", "NoChange", "Partial", "Complete"),
count = c(28, 45, 29, 26, 4, 12, 5, 2, 41, 44, 20, 20, 12, 7, 3, 1))
strata <- paste(sep = "", d$Therapy, ".", d$Gender)
d <- data.frame(d, strata)
d3 <- subset(d, Response != "Complete")
levels(d3$Response) <- c(NA, "NoChange", "Partial", "Progressive")
L.fct <- function(p) {
p <- matrix(p, 4, 4, byrow = TRUE)
clogit <- c()
for (s in 1:4) {
clogit <- c(clogit,
log(sum(p[s, 1]) / sum(p[s, 2:4])),
log(sum(p[s, 1:2]) / sum(p[s, 3:4])),
log(sum(p[s, 1:3]) / sum(p[s, 4]))
)
}
L <- as.matrix(clogit)
rownames(L) <- c(paste(sep = "", "log odds(R < ", 2:4, "|",
d3$strata, ")"))
L
}
a <- mph.fit(d$count, link = L.fct,
X = model.matrix(~ -1 + Response + Therapy + Gender,
data = d3),
strata = strata)
# Fit the related non-proportional odds cumulative logit model
b <- mph.fit(d$count, link = L.fct,
X = model.matrix(~ Response + Response * Therapy +
Response * Gender - 1 - Therapy - Gender,
data = d3),
strata = strata)
mph.summary(a, TRUE)
mph.summary(b, TRUE)
# }
Run the code above in your browser using DataLab