### Using a sample of simu_data dataset
### Y and Z are a same variable encoded in 2 different forms:
### (3 levels for Y and 5 levels for Z)
#--------
data(simu_data)
simu_dat <- simu_data[c(1:200, 301:500), ]
### An example of OUTCOME algorithm that uses:
#-----
# - A nearest neighbor procedure for the estimation of individual predictions
# - The Manhattan distance function
# - 90% of individuals from each modalities to calculate average distances
# between individuals and modalities
# Predictions are assessed for Y in B and Z in A
#-----
OUTC1 <- OT_outcome(simu_dat,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "M", maxrelax = 0,
indiv.method = "sequential"
)
head(OUTC1$DATA1_OT) # Part of the completed database A
head(OUTC1$DATA2_OT) # Part of the completed database B
head(OUTC1$estimatorZA[, , 1])
# ... Corresponds to P[Z = 1|Y,P1] when P1 corresponds to the 1st profile of covariates (P_1)
# detailed in the 1st row of the profile object:
OUTC1$profile[1, ] # Details of P_1
# So estimatorZA[1,1,1]= 0.2 corresponds to an estimation of:
# P[Z = 1|Y=[20-40],Gender_2=0,Treatment_2=1,Treatment_3=0,Smoking_2=1,Dosage=3,Age=65.44]
# Thus, we can conclude that all individuals with the P_1 profile of covariates have
# 20% of chance to be affected to the 1st level of Z in database A.
# ... And so on, the reasoning is the same for the estimatorYB object.
# \donttest{
### An example of OUTCOME algorithm with same conditions as the previous example, excepted that;
# - Only the individual predictions of Y in B are required
# - The continuous covariates "age" (related index = 8) will be converted in an ordinal factors
# of 3 balanced classes (tertiles)
# - The Gower distance is now used
### -----
OUTC2_B <- OT_outcome(simu_dat,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "G", maxrelax = 0,
convert.num = 8, convert.class = 3,
indiv.method = "sequential", which.DB = "B"
)
### An example of OUTCOME algorithm with same conditions as the first example, excepted that;
# - Only the individual predictions of Z in A are required
# - The continuous covariates "age" (related index = 8) will be converted in an ordinal factors
# of 3 balanced classes (tertiles)
# - Here, the Hamming distance can be applied because, after conversion, all covariates are factors.
# Disjunctive tables of each covariates will be automatically used to work with a set of binary
# variables.
### -----
OUTC3_B <- OT_outcome(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "H", maxrelax = 0,
convert.num = 8, convert.class = 3,
indiv.method = "sequential", which.DB = "B"
)
### An example of R-OUTCOME algorithm using:
# - An optimization procedure for individual predictions on the 2 databases
# - The Manhattan distance
# - Raw covariates
### -----
R_OUTC1 <- OT_outcome(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "M", maxrelax = 0,
indiv.method = "optimal"
)
### An example of R-OUTCOME algorithm with:
# - An optimization procedure for individual predictions on the 2 databases
# - The use of Euclidean distance on coordinates from FAMD
# - Raw covariates
### -----
R_OUTC2 <- OT_outcome(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "E",
FAMD.coord = "YES", FAMD.perc = 0.8,
indiv.method = "optimal"
)
### An example of R-OUTCOME algorithm with relaxation on marginal distributions and:
# - An optimization procedure for individual predictions on the 2 databases
# - The use of the euclidean distance
# - An arbitrary coefficient of relaxation
# - Raw covariates
#-----
R_OUTC3 <- OT_outcome(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
dist.choice = "E", maxrelax = 0.4,
indiv.method = "optimal"
)
# }
Run the code above in your browser using DataLab