library("dplyr")
# -- Data
train <- smocc_200[1:1198, ]
test <- smocc_200[1199:1940, ]
if (FALSE) {
# -- Fit model
fit <- brokenstick(hgt_z ~ age | id, data = train, knots = 0:2, seed = 1)
fit_light <- brokenstick(hgt_z ~ age | id,
data = train, knots = 0:2,
light = TRUE, seed = 1
)
# -- Predict, standard cases
# Use train data, return column with predictions
pred <- predict(fit)
identical(nrow(train), nrow(pred))
# Predict without newdata, not possible for light object
predict(fit_light)
# Use test data
pred <- predict(fit, newdata = test)
identical(nrow(test), nrow(pred))
# Predict, same but using newdata with the light object
pred_light <- predict(fit_light, newdata = test)
identical(pred, pred_light)
# -- Predict, special cases
# -- Case 1: x, -y, -group
# Case 1: x as "knots", standard estimates, train sample (n = 124)
z <- predict(fit, x = "knots", shape = "wide")
head(z, 3)
# Case 1: x as values, linearly interpolated, train sample (n = 124)
z <- predict(fit, x = c(0.5, 1, 1.5), shape = "wide", include_data = FALSE)
head(z, 3)
# Case 1: x as values, linearly interpolated, test sample (n = 76)
z <- predict(fit, test, x = c(0.5, 1, 1.5), shape = "wide", include_data = FALSE)
head(z, 3)
# Case 1: x, not possible for light object
z <- predict(fit_light, x = "knots")
# -- Case 2: x, y, -group
# Case 2: form one new group with id = 0
predict(fit, x = "knots", y = c(1, 1, 0.5, 0), shape = "wide")
# Case 2: works also for a light object
predict(fit_light, x = "knots", y = c(1, 1, 0.5, 0), shape = "wide")
# -- Case 3: -x, -y, group
# Case 3: Predict at observed age for subset of groups, training sample
pred <- predict(fit, group = c(10001, 10005, 10022))
head(pred, 3)
# Case 3: Of course, we cannot do this for light objects
pred_light <- predict(fit_light, group = c(10001, 10005, 10022))
# Case 3: We can use another sample. Note there is no child 999
pred <- predict(fit, test, group = c(11045, 11120, 999))
tail(pred, 3)
# Case 3: Works also for a light object
pred_light <- predict(fit_light, test, group = c(11045, 11120, 999))
identical(pred, pred_light)
# -- Case 4: x, -y, group
# Case 4: Predict at specified x, only in selected groups, train sample
pred <- predict(fit, x = c(0.5, 1, 1.25), group = c(10001, 10005, 10022),
include_data = FALSE)
pred
# Case 4: Same, but include observed data and sort
pred_all <- predict(fit,
x = c(0.5, 1, 1.25), group = c(10001, 10005, 10022)) %>%
dplyr::arrange(id, age)
# Case 4: Applies also to test sample
pred <- predict(fit, test, x = c(0.5, 1, 1.25), group = c(11045, 11120, 999),
include_data = FALSE)
pred
# Case 4: Works also with light object
pred_light <- predict(fit_light, test, x = c(0.5, 1, 1.25),
group = c(11045, 11120, 999), include_data = FALSE)
identical(pred_light, pred)
# -- Case 5: x, y, group
# Case 5: Add new data to training sample, and refreshes broken stick
# estimate at age x.
# Note that novel child (not in train) 999 has one data point
predict(fit,
x = c(0.9, 0.9, 0.9), y = c(1, 1, 1),
group = c(10001, 10005, 999), include_data = FALSE)
# Case 5: Same, but now for test sample. Novel child 899 has two data points
predict(fit, test,
x = c(0.5, 0.9, 0.6, 0.9),
y = c(0, 0.5, 0.5, 0.6), group = c(11045, 11120, 899, 899),
include_data = FALSE)
# Case 5: Also works for light object
predict(fit_light, test,
x = c(0.5, 0.9, 0.6, 0.9),
y = c(0, 0.5, 0.5, 0.6), group = c(11045, 11120, 899, 899),
include_data = FALSE)
# -- Case 6: As Case 5, but without previous data
# Case 6: Same call as last, but now without newdata = test
# All children are de facto novel as they do not occur in the training
# or test samples.
# Note: Predictions for 11045 and 11120 differ from prediction in Case 5.
predict(fit,
x = c(0.5, 0.9, 0.6, 0.9),
y = c(0, 0.5, 0.5, 0.6), group = c(11045, 11120, 899, 899))
# This also work for the light brokenstick object
predict(fit_light,
x = c(0.5, 0.9, 0.6, 0.9),
y = c(0, 0.5, 0.5, 0.6), group = c(11045, 11120, 899, 899))
}
Run the code above in your browser using DataLab