# \donttest{
# example data frame with mixed numeric, nominal, and ordinal data.
levels = c("Low", "Medium", "High")
df <- data.frame(
x1 = runif(100, 0, 100),
x2 = factor(sample(c("A", "B", "C"), 100, TRUE)),
x3 = factor(sample(c("A", "B", "C"), 100, TRUE)),
x4 = rnorm(100, 10, 3),
x5 = ordered(sample(c("Low", "Medium", "High"), 100, TRUE), levels = levels),
x6 = ordered(sample(c("Low", "Medium", "High"), 100, TRUE), levels = levels))
# minimal implementation requires just the data frame, and will automatically be
# defaulted to the mscv bandwidth specification technique and default kernel
# function
s1 <- kss(df = df)
# s$bandwidths to see the mscv obtained bandwidths
# s$similarities to see the similarity matrix
# try using the np package, which has few continuous and ordinal kernels
# to choose from. Recommended using default kernel functions
s2 <- kss(df = df, bw = "np") #defaults to npmethod "cv.ml"
# precomputed bandwidth example
# note that continuous variables requires bandwidths > 0
# ordinal variables requires bandwidths in [0,1]
# for nominal variables, u_aitken requires bandwidths in [0,1]
# and u_aitchisonaitken in [0,(c-1)/c]
# where c is the number of unique values in the i-th column of df.
# any bandwidths outside this range will result in a warning message
bw_vec <- c(1.0, 0.5, 0.5, 5.0, 0.3, 0.3)
s3 <- kss(df = df, bw = bw_vec)
# user-specific kernel functions example with "cv.ls" from np.
s4 <- kss(df = df, bw = "np", npmethod = "cv.ls", cFUN = "c_epanechnikov",
uFUN = "u_aitken", oFUN = "o_wangvanryzin")
# }
Run the code above in your browser using DataLab