library(GlmSimulatoR)
library(ggplot2)
library(MASS)
# Do glm and lm estimate the same weights? Yes
set.seed(1)
simdata <- simulate_gaussian()
linear_model <- lm(Y ~ X1 + X2 + X3, data = simdata)
glm_model <- glm(Y ~ X1 + X2 + X3,
data = simdata,
family = gaussian(link = "identity")
)
summary(linear_model)
summary(glm_model)
rm(linear_model, glm_model, simdata)
# If the link is not identity, will the response
# variable still be normal? Yes
set.seed(1)
simdata <- simulate_gaussian(N = 1000, link = "log", weights = c(.1, .2))
ggplot(simdata, aes(x = Y)) +
geom_histogram(bins = 30)
rm(simdata)
# Is AIC lower for the correct link? For ten thousand data points, depends
# on seed!
set.seed(1)
simdata <- simulate_gaussian(N = 10000, link = "inverse", weights = 1)
glm_correct_link <- glm(Y ~ X1,
data = simdata,
family = gaussian(link = "inverse")
)
glm_wrong_link <- glm(Y ~ X1,
data = simdata,
family = gaussian(link = "identity")
)
summary(glm_correct_link)$aic
summary(glm_wrong_link)$aic
rm(simdata, glm_correct_link, glm_wrong_link)
# Does a stepwise search find the correct model for logistic regression? Yes
# 3 related variables. 3 unrelated variables.
set.seed(1)
simdata <- simulate_binomial(
N = 10000, link = "logit",
weights = c(.3, .4, .5), unrelated = 3
)
scope_arg <- list(
lower = Y ~ 1,
upper = Y ~ X1 + X2 + X3 + Unrelated1 + Unrelated2 + Unrelated3
)
starting_model <- glm(Y ~ 1,
data = simdata,
family = binomial(link = "logit")
)
glm_model <- stepAIC(starting_model, scope_arg)
summary(glm_model)
rm(simdata, scope_arg, starting_model, glm_model)
# When the resposne is a gamma distribution, what does a scatter plot between
# X and Y look like?
set.seed(1)
simdata <- simulate_gamma(weights = 1)
ggplot(simdata, aes(x = X1, y = Y)) +
geom_point()
rm(simdata)
Run the code above in your browser using DataLab