# NOT RUN {
# Attach packages
library(cvms)
library(groupdata2) # partition()
library(dplyr) # %>% arrange()
# Data is part of cvms
data <- participant.scores
# Set seed for reproducibility
set.seed(7)
# Partition data
# Keep as single data frame
# We could also have fed validate() separate train and test sets.
data_partitioned <- partition(data,
p = 0.7,
cat_col = 'diagnosis',
id_col = 'participant',
list_out=FALSE) %>%
arrange(.partitions)
# Validate a model
# Gaussian
validate(data_partitioned,
models = "score~diagnosis",
partitions_col = '.partitions',
family='gaussian',
REML = FALSE)
# Binomial
validate(data_partitioned,
models = "diagnosis~score",
partitions_col = '.partitions',
family='binomial')
# Use non-default link functions
validate(data_partitioned,
models = "score~diagnosis",
partitions_col = '.partitions',
family = 'gaussian',
link = 'log',
REML = FALSE)
## Feed separate train and test sets
# Partition data to list of data frames
# The first data frame will be train (70% of the data)
# The second will be test (30% of the data)
data_partitioned <- partition(data, p = 0.7,
cat_col = 'diagnosis',
id_col = 'participant',
list_out=TRUE)
train_data <- data_partitioned[[1]]
test_data <- data_partitioned[[2]]
# Validate a model
# Gaussian
validate(train_data,
test_data = test_data,
models = "score~diagnosis",
family='gaussian',
REML = FALSE)
# }
Run the code above in your browser using DataLab