Learn R Programming

smartdata (version 1.0.3)

impute_missing: Missing values imputation wrapper

Description

Missing values imputation wrapper

Usage

impute_missing(dataset, method, exclude = NULL, ...)

Arguments

dataset

we want to impute missing values on

method

selected method of missing values imputation

exclude

character. Vector of attributes to exclude from the missing values treatment

...

Further arguments for method

Value

The treated dataset (either with noisy instances replaced or erased)

Examples

Run this code
# NOT RUN {
library("smartdata")
data(africa, package = "Amelia")
data(nhanes, package = "mice")
data(ozone,  package = "missMDA")
data(vnf,    package = "missMDA")
data(orange, package = "missMDA")
data(sleep,  package = "VIM")

super_nhanes <- impute_missing(nhanes, "gibbs_sampling")
super_nhanes <- impute_missing(nhanes, "gibbs_sampling", exclude = "chl")
# Use a different method for every column
impute_methods <- c("pmm", "midastouch", "norm_nob", "norm_boot")
super_nhanes <- impute_missing(nhanes, "gibbs_sampling", imputation = impute_methods)
super_nhanes <- impute_missing(nhanes, "central_imputation")
super_africa <- impute_missing(africa, "knn_imputation")
# Execute knn imputation with non default value for k
super_africa <- impute_missing(africa, "knn_imputation", k = 5)
super_africa <- impute_missing(africa, "expect_maximization", exclude = "country")
super_africa <- impute_missing(africa, "rf_imputation", num_iterations = 15,
                               num_trees = 200, bootstrap = FALSE)
# Examples of calls to 'PCA imputation' with wholly numeric datasets
# }
# NOT RUN {
super_orange <- impute_missing(orange, "PCA_imputation", num_dimensions = 5,
                               imputation = "EM")
super_orange <- impute_missing(orange, "PCA_imputation", num_dimensions = 5,
                               imputation = "Regularized")
# }
# NOT RUN {
super_orange <- impute_missing(orange, "PCA_imputation", num_dimensions = 5,
                               imputation = "Regularized", random_init = TRUE)
# Examples of calls to 'MCA imputation' with wholly categorical datasets
# }
# NOT RUN {
super_vnf    <- impute_missing(vnf, "MCA_imputation", num_dimensions = 5,
                               imputation = "EM")
super_vnf    <- impute_missing(vnf, "MCA_imputation", num_dimensions = 5,
                               imputation = "Regularized")
# }
# NOT RUN {
super_vnf    <- impute_missing(vnf, "MCA_imputation", num_dimensions = 5,
                               imputation = "Regularized", random_init = TRUE)
# Examples of calls to 'FAMD imputation' with hybrid datasets
# }
# NOT RUN {
super_ozone  <- impute_missing(ozone, "FAMD_imputation", num_dimensions = 5,
                               imputation = "EM", exclude = c("Ne12", "Vx15"))
super_ozone  <- impute_missing(ozone, "FAMD_imputation", num_dimensions = 5,
                               imputation = "Regularized")
# }
# NOT RUN {
super_ozone  <- impute_missing(ozone, "FAMD_imputation", num_dimensions = 5,
                               imputation = "Regularized", random_init = TRUE)

# Examples of hotdeck, iterative robust and reggresion imputations
super_sleep <- impute_missing(sleep, "hotdeck")
super_sleep <- impute_missing(sleep, "iterative_robust", initialization = "median",
                              num_iterations = 1000)
super_sleep <- impute_missing(sleep, "regression_imputation",
                              formula = Dream+NonD~BodyWgt+BrainWgt)

# Examples of adaptative shrinkage imputation
super_ozone <- impute_missing(ozone, "ATN", sigma = 2.2)
super_ozone <- impute_missing(ozone, "ATN", lambda = 0.025, gamma = 2.5)
super_ozone <- impute_missing(ozone, "ATN", tune = "SURE")

# }

Run the code above in your browser using DataLab