### Assuming two distinct databases from simu_data: data_A and data_B
### Some transformations will be made beforehand on variables to generate
### heterogeneities between the two bases.
data(simu_data)
data_A <- simu_data[simu_data$DB == "A", c(2, 4:8)]
data_B <- simu_data[simu_data$DB == "B", c(3, 4:8)]
# For the example, a covariate is added (Weight) only in data_A
data_A$Weight <- rnorm(300, 70, 5)
# Be careful: the target variables must be in factor (or ordered) in the 2 databases
# Because it is not the case for Yb2 in data_B, the function will convert it.
data_B$Yb2 <- as.factor(data_B$Yb2)
# Moreover, the Dosage covariate is stored in 3 classes in data_B (instead of 4 classes in data_B)
# to make the encoding of this covariate specific to each database.
data_B$Dosage <- as.character(data_B$Dosage)
data_B$Dosage <- as.factor(ifelse(data_B$Dosage %in% c("Dos 1", "Dos 2"), "D1",
ifelse(data_B$Dosage == "Dos 3", "D3", "D4")
))
# For more diversity, this covariate iis placed at the last column of the data_B
data_B <- data_B[, c(1:3, 5, 6, 4)]
# Ex 1: The two databases are merged and incomplete covariates are imputed using MICE
merged_ex1 <- merge_dbs(data_A, data_B,
NAME_Y = "Yb1", NAME_Z = "Yb2",
ordinal_DB1 = c(1, 4), ordinal_DB2 = c(1, 6),
impute = "MICE", R_MICE = 2, seed_choice = 3011)
summary(merged_ex1$DB_READY)
# Ex 2: The two databases are merged and missing values are kept
merged_ex2 <- merge_dbs(data_A, data_B,
NAME_Y = "Yb1", NAME_Z = "Yb2",
ordinal_DB1 = c(1, 4), ordinal_DB2 = c(1, 6),
impute = "NO", seed_choice = 3011
)
# Ex 3: The two databases are merged by only keeping the complete cases
merged_ex3 <- merge_dbs(data_A, data_B,
NAME_Y = "Yb1", NAME_Z = "Yb2",
ordinal_DB1 = c(1, 4), ordinal_DB2 = c(1, 6),
impute = "CC", seed_choice = 3011
)
# Ex 4: The two databases are merged and incomplete covariates are imputed using FAMD
merged_ex4 <- merge_dbs(data_A, data_B,
NAME_Y = "Yb1", NAME_Z = "Yb2",
ordinal_DB1 = c(1, 4), ordinal_DB2 = c(1, 6),
impute = "FAMD", NCP_FAMD = 4, seed_choice = 2096
)
# Conclusion:
# The data fusion is successful in each situation.
# The Dosage and Weight covariates have been normally excluded from the fusion.
# The covariates have been imputed when required.
Run the code above in your browser using DataLab