library(h2o)
localH2O = h2o.init()
# randomly repalce 50 rows in each column of the iris dataset with NA
ds <- iris
ds[sample(nrow(ds), 50),1] <- NA
ds[sample(nrow(ds), 50),2] <- NA
ds[sample(nrow(ds), 50),3] <- NA
ds[sample(nrow(ds), 50),4] <- NA
ds[sample(nrow(ds), 50),5] <- NA
# upload the NA'ed dataset to H2O
hex <- as.h2o(localH2O, ds)
head(hex)
# impute the numeric column in place with "median"
h2o.impute(hex, .(Sepal.Length), method = "median")
# impute with the mean based on the groupBy columns Sepal.Length and Petal.Width and Species
h2o.impute(hex, 2, method = "mean", groupBy = .(Sepal.Length, Petal.Width, Species))
# impute the Species column with the "mode" based on the columns 1 and 4
h2o.impute(hex, 5, method = "mode", groupBy = c(1,4))
Run the code above in your browser using DataLab