# \donttest{
# example data frame with mixed numeric, nominal, and ordinal data.
levels = c("Low", "Medium", "High")
df <- data.frame(
x1 = runif(100, 0, 100),
x2 = factor(sample(c("A", "B", "C"), 100, TRUE)),
x3 = factor(sample(c("A", "B", "C"), 100, TRUE)),
x4 = rnorm(100, 10, 3),
x5 = ordered(sample(c("Low", "Medium", "High"), 100, TRUE), levels = levels),
x6 = ordered(sample(c("Low", "Medium", "High"), 100, TRUE), levels = levels))
# minimal implementation requires just the data frame, and will automatically be
# defaulted to the mscv bandwidth specification technique and default kernel
# function
d1 <- dkss(df = df)
# d$bandwidths to see the mscv obtained bandwidths
# d$distances to see the distance matrix
# try using the np package, which has few continuous and ordinal kernels
# to choose from. Recommended using default kernel functions
d2 <- dkss(df = df, bw = "np")
# precomputed bandwidth example
# note that continuous variables requires bandwidths > 0
# ordinal variables requires bandwidths in [0,1]
# for nominal variables, u_aitken requires bandwidths in [0,1]
# and u_aitchisonaitken in [0,(c-1)/c]
# where c is the number of unique values in the i-th column of df.
# any bandwidths outside this range will result in a warning message
bw_vec <- c(1.0, 0.5, 0.5, 5.0, 0.3, 0.3)
d3 <- dkss(df = df, bw = bw_vec)
# user-specific kernel functions example
d5 <- dkss(df = df, bw = "mscv", cFUN = "c_epanechnikov", uFUN = "u_aitken",
oFUN = "o_habbema")
# }
Run the code above in your browser using DataLab