# NOT RUN {
data(dataM)
library(magrittr)
library(dplyr)
if (requireNamespace("recipes", quietly = TRUE)&requireNamespace("Stat2Data", quietly = TRUE)) {
data("MathPlacement", package="Stat2Data")
head(MathPlacement)
library(recipes)
# As some of the data is missing, k-nearest neighbors (knn) imputation is
# used to fill the gaps. This is done with recipes package and function
# step_knnimpute.
dataM <- recipe(~ ., data = MathPlacement) %>%
step_knnimpute(everything()) %>% prep() %>% juice()
# Afterwards we create a categorical variable that will show whether a
# student took a course which was too high, too low, the recommended one or
# something else happened:
dataM %<>% mutate(Student = 1:n(), DR_Course = case_when(
TooHigh == 1 ~ "chigh",
TooLow == 1 ~ "alow",
RecTaken == 1 ~ "bnormal",
TRUE ~"dother"
))
# We remove observations with ambiguous course status:
dataM %<>% filter(DR_Course!="dother")
dataM %>% select(DR_Course) %>% table %>% t
}
# }
Run the code above in your browser using DataLab