data(simu_data)
### Example with the Manhattan distance
man1 <- transfo_dist(simu_data,
quanti = c(3, 8), nominal = c(1, 4:5, 7),
ordinal = c(2, 6), logic = NULL, prep_choice = "M"
)
mat_man1 <- proxim_dist(man1, norm = "M")
### Y(Yb1) and Z(Yb2) are a same information encoded in 2 different forms:
### (3 levels for Y and 5 levels for Z)
### ... Stored in two distinct databases, A and B, respectively
### The marginal distribution of Y in B is unknown,
### as the marginal distribution of Z in A ...
# Empirical distribution of Y in database A:
freqY <- prop.table(table(man1$Y))
freqY
# Empirical distribution of Z in database B
freqZ <- prop.table(table(man1$Z))
freqZ
# By supposing that the following matrix called transport symbolizes
# an estimation of the joint distribution L(Y,Z) ...
# Note that, in reality this distribution is UNKNOWN and is
# estimated in the OT function by resolving an optimisation problem.
transport1 <- matrix(c(0.3625, 0, 0, 0.07083333, 0.05666667,
0, 0, 0.0875, 0, 0, 0.1075, 0,
0, 0.17166667, 0.1433333),
ncol = 5, byrow = FALSE)
# ... So that the marginal distributions of this object corresponds to freqY and freqZ:
apply(transport1, 1, sum) # = freqY
apply(transport1, 2, sum) # = freqZ
# The affectation of the predicted values of Y in database B and Z in database A
# are stored in the following object:
pred_man1 <- indiv_grp_closest(mat_man1,
jointprobaA = transport1, jointprobaB = transport1,
percent_closest = 0.90
)
summary(pred_man1)
# For the prediction of Z in A only, add the corresponding argument:
pred_man1_A <- indiv_grp_closest(mat_man1,
jointprobaA = transport1, jointprobaB = transport1,
percent_closest = 0.90, which.DB = "A"
)
Run the code above in your browser using DataLab