data(vi)
#subset to limit example run time
vi <- vi[1:1000, ]
#mean encoding
#-------------
#without noise
df <- target_encoding_mean(
df = vi,
response = "vi_mean",
predictor = "soil_type",
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#with noise
df <- target_encoding_mean(
df = vi,
response = "vi_mean",
predictor = "soil_type",
white_noise = 0.1,
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#group rank
#----------
df <- target_encoding_rank(
df = vi,
response = "vi_mean",
predictor = "soil_type",
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#leave-one-out
#-------------
#without noise
df <- target_encoding_loo(
df = vi,
response = "vi_mean",
predictor = "soil_type",
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#with noise
df <- target_encoding_loo(
df = vi,
response = "vi_mean",
predictor = "soil_type",
white_noise = 0.1,
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#rnorm
#-----
#without sd multiplier
df <- target_encoding_rnorm(
df = vi,
response = "vi_mean",
predictor = "soil_type",
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
#with sd multiplier
df <- target_encoding_rnorm(
df = vi,
response = "vi_mean",
predictor = "soil_type",
rnorm_sd_multiplier = 0.1,
replace = TRUE
)
plot(
x = df$soil_type,
y = df$vi_mean,
xlab = "encoded variable",
ylab = "response"
)
Run the code above in your browser using DataLab