## Default method:
h2o.glm(x, y, data, family, nfolds = 10, alpha = 0.5, lambda = 1e-5, epsilon = 1e-4,
standardize = TRUE, prior, tweedie.p = ifelse(family == 'tweedie', 1.5,
as.numeric(NA)), thresholds, iter.max, higher_accuracy, lambda_search, version = 2)
## Import to a ValueArray object:
h2o.glm.VA(x, y, data, family, nfolds = 10, alpha = 0.5, lambda = 1e-5, epsilon = 1e-4,
standardize = TRUE, prior, tweedie.p = ifelse(family == 'tweedie', 1.5,
as.numeric(NA)), thresholds = ifelse(family == 'binomial', seq(0, 1, 0.01),
as.numeric(NA)))
## Import to a FluidVecs object:
h2o.glm.FV(x, y, data, family, nfolds = 10, alpha = 0.5, lambda = 1e-5, epsilon = 1e-4,
standardize = TRUE, prior, tweedie.p = ifelse(family == 'tweedie', 1.5,
as.numeric(NA)), iter.max = 100, higher_accuracy = FALSE, lambda_search = FALSE)
H2OParsedDataVA
(version = 1
) or H2OParsedData
(version = 2
) object containing the variables in the model.alpha=1
is the lasso lambda
is, the more the coefficients are shrunk toward zero (and each other).family = "binomial"
. When omitted, prior will default to the frequency of class 1 in the response column.family = "tweedie"
.lambda
will be interpreted as lambda_min.version = 1
, this will run the more stable ValueArray implementation, while version = 2
runs the faster, but still beta stage FluidVecs implementation.H2OGLMModelVA
(version = 1
) or H2OGLMModel
(version = 2
) with slots key, data, model and xval. The slot model is a list of the following components:H2OGLMModel
or H2OGLMModelVA
objects representing the cross-validation models. (Each of these objects themselves has xval equal to an empty list).version = 1
, you must import data to a ValueArray object using h2o.importFile.VA
, h2o.importFolder.VA
or one of its variants. To run with version = 2
, you must import data to a FluidVecs object using h2o.importFile.FV
, h2o.importFolder.FV
or one of its variants.h2o.importFile, h2o.importFolder, h2o.importHDFS, h2o.importURL, h2o.uploadFile
library(h2o)
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
# Run GLM of CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = paste("https://raw.github.com",
"0xdata/h2o/master/smalldata/logreg/prostate.csv", sep = "/"), key = "prostate.hex")
h2o.glm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, family = "binomial",
nfolds = 10, alpha = 0.5)
# Run GLM of VOL ~ CAPSULE + AGE + RACE + PSA + GLEASON
myX = setdiff(colnames(prostate.hex), c("ID", "DPROS", "DCAPS", "VOL"))
h2o.glm(y = "VOL", x = myX, data = prostate.hex, family = "gaussian", nfolds = 5, alpha = 0.1)
Run the code above in your browser using DataLab