# NOT RUN {
r = rsmp("subsampling")
# Default parametrization
r$param_set$values
# Do only 3 repeats on 10% of the data
r$param_set$values = list(ratio = 0.1, repeats = 3)
r$param_set$values
# Instantiate on iris task
task = tsk("iris")
r$instantiate(task)
# Extract train/test sets
train_set = r$train_set(1)
print(train_set)
intersect(train_set, r$test_set(1))
# Another example: 10-fold CV
r = rsmp("cv")$instantiate(task)
r$train_set(1)
# Stratification
task = tsk("pima")
prop.table(table(task$truth())) # moderately unbalanced
task$col_roles$stratum = task$target_names
r = rsmp("subsampling")
r$instantiate(task)
prop.table(table(task$truth(r$train_set(1)))) # roughly same proportion
# }
Run the code above in your browser using DataCamp Workspace