mlr3oml

mlr3oml allows to create mlr3 tasks directly from OpenML data sets. Furthermore, you can also obtain the data and the resampling for a given OpenML task. Caching can be enabled by setting the option "mlr3oml.cache". Uploading to OpenML is currently not supported, use the OpenML package package for this.

Short Demo

library("mlr3")
library("mlr3oml")

# new parametrized task "oml"
tsk("oml", data_id = 31)

## INFO  [15:05:51.231] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}
## INFO  [15:05:51.378] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:51.446] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:credit-g> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since

tsk("oml", task_id = 59)

## INFO  [15:05:51.552] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.589] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.621] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <TaskClassif:Task 59: iris (Supervised Classification)> (150 x 5)
## * Target: class
## * Properties: multiclass
## * Features (4):
##   - dbl (4): petallength, petalwidth, sepallength, sepalwidth

# same for resampling
rsmp("oml", task_id = 59)

## INFO  [15:05:51.688] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.727] Downloading ARFF {url: https://www.openml.org/api_splits/get/59/Task_59_splits.arff}
## INFO  [15:05:51.784] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.818] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()

# R6 class for data sets
oml_data = OMLData$new(61)
oml_data$name

## INFO  [15:05:51.862] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}

## [1] "iris"

oml_data$nrow

## INFO  [15:05:51.904] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/61}

## [1] 150

oml_data$ncol

## INFO  [15:05:51.956] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/61}

## [1] 5

oml_data$data

## INFO  [15:05:52.002] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

##      sepallength sepalwidth petallength petalwidth          class
##   1:         5.1        3.5         1.4        0.2    Iris-setosa
##   2:         4.9        3.0         1.4        0.2    Iris-setosa
##   3:         4.7        3.2         1.3        0.2    Iris-setosa
##   4:         4.6        3.1         1.5        0.2    Iris-setosa
##   5:         5.0        3.6         1.4        0.2    Iris-setosa
##  ---                                                             
## 146:         6.7        3.0         5.2        2.3 Iris-virginica
## 147:         6.3        2.5         5.0        1.9 Iris-virginica
## 148:         6.5        3.0         5.2        2.0 Iris-virginica
## 149:         6.2        3.4         5.4        2.3 Iris-virginica
## 150:         5.9        3.0         5.1        1.8 Iris-virginica

# R6 class for tasks
oml_task = OMLTask$new(31)
oml_task$name

## INFO  [15:05:52.062] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/31}

## [1] "Task 31: credit-g (Supervised Classification)"

oml_task$nrow

## INFO  [15:05:52.111] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/31}

## [1] 1000

oml_task$ncol

## INFO  [15:05:52.162] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:52.205] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}

## [1] 21

oml_task$task

## INFO  [15:05:52.251] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:Task 31: credit-g (Supervised Classification)> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since

oml_task$resampling

## INFO  [15:05:52.338] Downloading ARFF {url: https://www.openml.org/api_splits/get/31/Task_31_splits.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()

# list oml data sets with 5 features and 50 - 200 instances
tab = list_oml_data_sets(number_features = 5, number_instances = c(50, 200))

## INFO  [15:05:52.406] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000}
## INFO  [15:05:52.610] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000/offset/1000}

head(tab[, .(data_id, name)])

##    data_id                    name
## 1:      61                    iris
## 2:     199                fruitfly
## 3:     214                baskball
## 4:     329              hayes-roth
## 5:     346                    aids
## 6:     551 analcatdata_michiganacc

# list first 10 oml tasks
tab = list_oml_tasks(limit = 10)

## INFO  [15:05:52.799] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/list/limit/10}

tab[, .(task_id, data_id, name)]

##     task_id data_id            name
##  1:       2       2          anneal
##  2:       3       3        kr-vs-kp
##  3:       4       4           labor
##  4:       5       5      arrhythmia
##  5:       6       6          letter
##  6:       7       7       audiology
##  7:       8       8 liver-disorders
##  8:       9       9           autos
##  9:      10      10           lymph
## 10:      11      11   balance-scale

mlr3oml

Short Demo

Copy Link

Version

Install

Monthly Downloads

Version

License

Issues

Pull Requests

Stars

Forks

Repository

Homepage

Maintainer

Last Published

Functions in mlr3oml (0.2.0)