# NOT RUN {
# So, a simple example where we want to split the dataset "X" into "X_train"
# and "X_test" with 60% of the data in the training set and 40% of the
# dataset in the test set, we could run
# }
# NOT RUN {
output <- preprocess_split(input=X, test_ratio=0.4)
X_train <- output$training
X_test <- output$test
# }
# NOT RUN {
# Also by default the dataset is shuffled and split; you can provide the
# "no_shuffle" option to avoid shuffling the data; an example to avoid
# shuffling of data is:
# }
# NOT RUN {
output <- preprocess_split(input=X, test_ratio=0.4, no_shuffle=TRUE)
X_train <- output$training
X_test <- output$test
# }
# NOT RUN {
# If we had a dataset "X" and associated labels "y", and we wanted to split
# these into "X_train", "y_train", "X_test", and "y_test", with 30% of the
# data in the test set, we could run
# }
# NOT RUN {
output <- preprocess_split(input=X, input_labels=y, test_ratio=0.3)
X_train <- output$training
y_train <- output$training_labels
X_test <- output$test
y_test <- output$test_labels
# }
Run the code above in your browser using DataLab