# NOT RUN {
# generate random data
Data <- matrix(runif(800), nrow=100, ncol=80,
dimnames = list(paste0("G",1:100), paste0("S",1:80)))
# generate random labels
L <- sample(x = c("A","B","C","D"), size = 80, replace = TRUE)
# generate random platform labels
P <- sample(c("P1","P2","P3"), size = 80, replace = TRUE)
# create data object
object <- ReadData(Data = Data,
Labels = L,
Platform = P,
verbose = FALSE)
# sort genes
genes_RF <- sort_genes_RF(data_object = object,
seed=123456, verbose = FALSE)
# to get an idea of how many genes we will use
# and how many rules will be generated
# summary_genes_RF(sorted_genes_RF = genes_RF,
# genes_altogether = c(10,20,50,100,150,200),
# genes_one_vs_rest = c(10,20,50,100,150,200))
# creat and sort rules
# rules_RF <- sort_rules_RF(data_object = object,
# sorted_genes_RF = genes_RF,
# genes_altogether = 100,
# genes_one_vs_rest = 100,
# seed=123456,
# verbose = FALSE)
# parameters <- data.frame(
# gene_repetition=c(3,2,1),
# rules_one_vs_rest=0,
# rules_altogether=c(2,3,10),
# run_boruta=c(FALSE,"produce_error",FALSE),
# plot_boruta = FALSE,
# num.trees=c(100,200,300),
# stringsAsFactors = FALSE)
# parameters
# test <- optimize_RF(data_object = object,
# sorted_rules_RF = rules_RF,
# test_object = NULL,
# overall = c("Accuracy"),
# byclass = NULL, verbose = FALSE,
# parameters = parameters)
# test
# test$summary[which.max(test$summary$Accuracy),]
#
# # train the final model
# # it is preferred to increase the number of trees and rules in case you have
# # large number of samples and features
# # for quick example, we have small number of trees and rules here
# # based on the optimize_RF results we will select the parameters
# RF_classifier <- train_RF(data_object = object,
# gene_repetition = 1,
# rules_altogether = 0,
# rules_one_vs_rest = 10,
# run_boruta = FALSE,
# plot_boruta = FALSE,
# probability = TRUE,
# num.trees = 300,
# sorted_rules_RF = rules_RF,
# boruta_args = list(),
# verbose = TRUE)
#
# # training accuracy
# # get the prediction labels
# # if the classifier trained using probability = FALSE
# training_pred <- RF_classifier$RF_scheme$RF_classifier$predictions
# if (is.factor(training_pred)) {
# x <- as.character(training_pred)
# }
#
# # if the classifier trained using probability = TRUE
# if (is.matrix(training_pred)) {
# x <- colnames(training_pred)[max.col(training_pred)]
# }
#
# # training accuracy
# caret::confusionMatrix(data =factor(x),
# reference = factor(object$data$Labels),
# mode = "everything")
# not to run
# visualize the binary rules in training dataset
# plot_binary_RF(Data = object,
# classifier = RF_classifier,
# prediction = NULL, as_training = TRUE,
# show_scores = TRUE,
# top_anno = "ref",
# show_predictions = TRUE,
# title = "Training data")
# not to run
# predict
# test_object # any test data
# results <- predict_RF(classifier = RF_classifier, impute = TRUE,
# Data = test_object)
#
# # visualize the binary rules in training dataset
# plot_binary_RF(Data = test_object,
# classifier = RF_classifier,
# prediction = results, as_training = FALSE,
# show_scores = TRUE,
# top_anno = "ref",
# show_predictions = TRUE,
# title = "Test data")
# }
Run the code above in your browser using DataLab