data(vi)
#subset to limit example run time
vi <- vi[1:1000, ]
#mean encoding
#-------------
#without noise
df <- target_encoding_mean(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
#group rank
#----------
df <- target_encoding_rank(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
#leave-one-out
#-------------
#without noise
df <- target_encoding_loo(
df = vi,
response = "vi_numeric",
predictor = "soil_type",
encoded_name = "soil_type_encoded"
)
plot(
x = df$soil_type_encoded,
y = df$vi_numeric,
xlab = "encoded variable",
ylab = "response"
)
Run the code above in your browser using DataLab