if (FALSE) {
# Example using the iris dataset and default ranger algorithm
iris_syn <- synthetic(iris)
# Example using lm as prediction algorithm (only works for numeric variables)
# note that, within the model_expression, a new data.frame is created because
# lm() requires a separate data argument:
dat <- iris[, 1:4]
synthetic(dat,
model_expression = lm(.outcome ~ .,
data = data.frame(.outcome = y,
xsynth)),
predict_expression = predict(model, newdata = xsynth))
}
# Example using bootstrapping:
synthetic(iris,
model_expression = NULL,
predict_expression = sample(y, size = length(y), replace = TRUE))
if (FALSE) {
# Example with missing data, no imputation
iris_missings <- iris
for(i in 1:10){
iris_missings[sample.int(nrow(iris_missings), 1, replace = TRUE),
sample.int(ncol(iris_missings), 1, replace = TRUE)] <- NA
}
iris_miss_syn <- synthetic(iris_missings)
# Example with missing data, imputation by median/mode substitution
# First, define a simple function for median/mode substitution:
imp_fun <- function(x){
if(is.data.frame(x)){
return(data.frame(sapply(x, imp_fun)))
} else {
out <- x
if(inherits(x, "numeric")){
out[is.na(out)] <- median(x[!is.na(out)])
} else {
out[is.na(out)] <- names(sort(table(out), decreasing = TRUE))[1]
}
out
}
}
# Then, call synthetic() with this function as missingness_expression:
iris_miss_syn <- synthetic(iris_missings,
missingness_expression = imp_fun(data))
}
Run the code above in your browser using DataLab