⚠️There's a newer version (0.9.0) of this package. Take me there.

mlr3oml

mlr3oml allows to create mlr3 tasks directly from OpenML data sets. Furthermore, you can also obtain the data and the resampling for a given OpenML task. Caching can be enabled by setting the option "mlr3oml.cache". Uploading to OpenML is currently not supported, use the OpenML package package for this.

Short Demo

library("mlr3")
library("mlr3oml")

# new parametrized task "oml"
tsk("oml", data_id = 31)
## INFO  [15:05:51.231] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}
## INFO  [15:05:51.378] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:51.446] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:credit-g> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since
tsk("oml", task_id = 59)
## INFO  [15:05:51.552] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.589] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.621] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <TaskClassif:Task 59: iris (Supervised Classification)> (150 x 5)
## * Target: class
## * Properties: multiclass
## * Features (4):
##   - dbl (4): petallength, petalwidth, sepallength, sepalwidth
# same for resampling
rsmp("oml", task_id = 59)
## INFO  [15:05:51.688] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.727] Downloading ARFF {url: https://www.openml.org/api_splits/get/59/Task_59_splits.arff}
## INFO  [15:05:51.784] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.818] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()
# R6 class for data sets
oml_data = OMLData$new(61)
oml_data$name
## INFO  [15:05:51.862] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}

## [1] "iris"
oml_data$nrow
## INFO  [15:05:51.904] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/61}

## [1] 150
oml_data$ncol
## INFO  [15:05:51.956] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/61}

## [1] 5
oml_data$data
## INFO  [15:05:52.002] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

##      sepallength sepalwidth petallength petalwidth          class
##   1:         5.1        3.5         1.4        0.2    Iris-setosa
##   2:         4.9        3.0         1.4        0.2    Iris-setosa
##   3:         4.7        3.2         1.3        0.2    Iris-setosa
##   4:         4.6        3.1         1.5        0.2    Iris-setosa
##   5:         5.0        3.6         1.4        0.2    Iris-setosa
##  ---                                                             
## 146:         6.7        3.0         5.2        2.3 Iris-virginica
## 147:         6.3        2.5         5.0        1.9 Iris-virginica
## 148:         6.5        3.0         5.2        2.0 Iris-virginica
## 149:         6.2        3.4         5.4        2.3 Iris-virginica
## 150:         5.9        3.0         5.1        1.8 Iris-virginica
# R6 class for tasks
oml_task = OMLTask$new(31)
oml_task$name
## INFO  [15:05:52.062] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/31}

## [1] "Task 31: credit-g (Supervised Classification)"
oml_task$nrow
## INFO  [15:05:52.111] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/31}

## [1] 1000
oml_task$ncol
## INFO  [15:05:52.162] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:52.205] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}

## [1] 21
oml_task$task
## INFO  [15:05:52.251] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:Task 31: credit-g (Supervised Classification)> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since
oml_task$resampling
## INFO  [15:05:52.338] Downloading ARFF {url: https://www.openml.org/api_splits/get/31/Task_31_splits.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()
# list oml data sets with 5 features and 50 - 200 instances
tab = list_oml_data_sets(number_features = 5, number_instances = c(50, 200))
## INFO  [15:05:52.406] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000}
## INFO  [15:05:52.610] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000/offset/1000}
head(tab[, .(data_id, name)])
##    data_id                    name
## 1:      61                    iris
## 2:     199                fruitfly
## 3:     214                baskball
## 4:     329              hayes-roth
## 5:     346                    aids
## 6:     551 analcatdata_michiganacc
# list first 10 oml tasks
tab = list_oml_tasks(limit = 10)
## INFO  [15:05:52.799] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/list/limit/10}
tab[, .(task_id, data_id, name)]
##     task_id data_id            name
##  1:       2       2          anneal
##  2:       3       3        kr-vs-kp
##  3:       4       4           labor
##  4:       5       5      arrhythmia
##  5:       6       6          letter
##  6:       7       7       audiology
##  7:       8       8 liver-disorders
##  8:       9       9           autos
##  9:      10      10           lymph
## 10:      11      11   balance-scale

Copy Link

Version

Down Chevron

Install

install.packages('mlr3oml')

Monthly Downloads

464

Version

0.2.0

License

LGPL-3

Issues

Pull Requests

Stars

Forks

Maintainer

Last Published

August 24th, 2020

Functions in mlr3oml (0.2.0)