####################################
######### Artificial data ##########
####################################
### Set up java parameter and load rmcfs package
options(java.parameters = "-Xmx4g")
library(rmcfs)
# create input data
adata <- artificial.data(rnd.features = 10)
showme(adata)
result <- mcfs(class~., adata, projections = 100, projectionSize = 4,
cutoffPermutations = 3, finalCV = FALSE, finalRuleset = TRUE,
threadsNumber = 2)
# Print basic information about mcfs result.
print(result)
# Review cutoff values for all methods
print(result$cutoff)
# Review cutoff value used in plots
print(result$cutoff_value)
# Plot & print out distances between subsequent projections.
# These are convergence MCFS-ID statistics.
plot(result, type="distances")
print(result$distances)
# Plot & print out 50 most important features.
plot(result, type="ri", size = 50)
# Show max RI values from permutation experiment.
plot(result, type = "ri", size = 50, plot_permutations = TRUE)
print(head(result$RI, 50))
# Plot & print out 50 strongest feature interdependencies.
plot(result, type = "id", size = 50)
print(head(result$ID, 50))
# Plot features ordered by RI_norm. Parameter 'size' is the number of
# top features in the chart. We set this parameter a bit larger than cutoff_value.
plot(result, type = "features", size = result$cutoff_value * 1.1, cex = 1)
# Here we set 'size' at fixed value 10.
plot(result, type = "features", size = 10)
# Plot cv classification result obtained on top features.
# In the middle of x axis red label denotes cutoff_value.
# plot(result, type = "cv", measure = "wacc", cex = 0.8)
# Plot & print out confusion matrix. This matrix is the result of
# all classifications performed by all decision trees on all s*t datasets.
plot(result, type = "cmatrix")
# build interdependencies graph (all default parameters).
gid <- build.idgraph(result)
plot(gid)
# build interdependencies graph for top 6 features
# and top 12 interdependencies and plot all nodes
gid <- build.idgraph(result, size = 6, size_ID = 12, plot_all_nodes = TRUE)
plot(gid, label.dist = 1)
# Export graph to graphML (XML structure)
path <- tempdir()
igraph::write.graph(gid, file = paste0(path, "/artificial.graphml"),
format = "graphml", prefixAttr = FALSE)
# Export and import results to/from csv files
export.result(result, path = path, label = "artificial", save.rds = FALSE)
result <- import.result(path = path, label = "artificial")
## Not run:
# ####################################
# ########## Alizadeh data ###########
# ####################################
# # Load Alizadeh dataset.
# data(alizadeh)
# showme(alizadeh)
#
# # Fix data types and data values - replace characters such as "," " " "/" etc.
# # from values and column names and fix data types
# # This function may help if mcfs has any problems with input data
# alizadeh <- fix.data(alizadeh)
#
# # Parametrize and run MCFS-ID procedure, projectionSize (m) is set at 5 percent
# # of input columns. For larger data (thousands of features) default settings are good enough.
# # This example may take few minutes but this one is a real dataset
# result <- mcfs(class~., alizadeh, projections = 2000, projectionSize = 0.05,
# cutoffPermutations = 10, finalCV = TRUE, finalRuleset = TRUE,
# threadsNumber = 4)
#
# # Print basic information about mcfs result.
# print(result)
#
# # Plot & print out distances between subsequent projections.
# plot(result, type="distances")
#
# # Show max RI values from permutation experiment.
# plot(result, type = "ri", size = 500, plot_permutations = TRUE)
#
# # build interdependencies graph.
# gid <- build.idgraph(result, size = 20)
# plot.idgraph(gid, label.dist = 0.3)
#
# # Plot cv classification result obtained on top features.
# # In the middle of x axis red label denotes cutoff_value.
# plot(result, type = "cv", measure = "wacc", cex = 0.8)
# ## End(Not run)
Run the code above in your browser using DataLab