### Using the table simu_data:
data(simu_data)
# 1. the Euclidean distance (same output with Manhattan distance),
eucl1 <- transfo_dist(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7),
ordinal = c(2, 6), logic = NULL, prep_choice = "E"
)
# Here Yb2 was stored in numeric: It has been automatically converted in factor
# You can also convert beforehand Yb2 in ordered factor by example:
sim_data <- simu_data
sim_data$Yb2 <- as.ordered(sim_data$Yb2)
eucl2 <- transfo_dist(sim_data,
quanti = 8, nominal = c(1, 4:5, 7),
ordinal = c(2, 3, 6), logic = NULL, prep_choice = "E"
)
# 2. The Euclidean distance generated on principal components
# by a factor analysis for mixed data (FAMD):
eucl_famd <- transfo_dist(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7),
ordinal = c(2, 6), logic = NULL, prep_choice = "FAMD"
)
# Please notice that this method works only with rows that have complete
# information on covariates.
# 3. The Gower distance for mixed data:
gow1 <- transfo_dist(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7),
ordinal = c(2, 6), logic = NULL, prep_choice = "G"
)
# 4. The Hamming distance:
# Here the quanti option could only contain indexes related to targets.
# Column indexes related to potential binary covariates or covariates with
# finite number of values must be include in the ordinal option.
# So in simu_data, the discretization of the variable age is required (index=8),
# using the convert_num and convert_class arguments (for tertiles = 3):
ham1 <- transfo_dist(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
convert_num = 8, convert_class = 3, prep_choice = "H"
)
### This function works whatever the order of your columns in your database:
# Suppose that we re-order columns in simu_data:
simu_data2 <- simu_data[, c(2, 4:7, 3, 8, 1)]
# By changing the corresponding indexes in the index_DB_Y_Z argument,
# we observe the desired output:
eucl3 <- transfo_dist(simu_data2,
index_DB_Y_Z = c(8, 1, 6), quanti = 6:7, nominal = c(2:3, 5, 8),
ordinal = c(1, 4), logic = NULL, prep_choice = "E"
)
Run the code above in your browser using DataLab