####################################
######### Artificial data ##########
####################################
### Set up java parameter and load rmcfs package
options(java.parameters = "-Xmx4g")
library(rmcfs)
# create input data
adata <- artificial.data(rnd.features = 10)
info(adata)
result <- mcfs(class~., adata, projections = 300, projectionSize = 4,
cutoffPermutations = 5, finalCV = TRUE, finalRuleset = TRUE,
threadsNumber = 2)
# Print basic information about mcfs result.
print(result)
# Review cutoff values for all methods
print(result$cutoff)
# Review cutoff value used in plots
print(result$cutoff_value)
# Plot & print out distances between subsequent projections.
# These are convergence MCFS-ID statistics.
plot(result, type="distances")
print(result$distances)
# Plot & print out 50 most important features.
plot(result, type="ri", size = 50)
# Show max RI values from permutation experiment.
plot(result, type = "ri", size = 50, plot_permutations = TRUE)
print(head(result$RI, 50))
# Plot & print out 50 strongest feature interdependencies.
plot(result, type = "id", size = 50)
print(head(result$ID, 50))
# Plot features ordered by RI_norm. Parameter 'size' is the number of
# top features in the chart. We set this parameter a bit larger than cutoff_value.
plot(result, type = "features", size = result$cutoff_value * 1.1, cex = 1)
# Here we set 'size' at fixed value 10.
plot(result, type = "features", size = 10)
# Plot cv classification result obtained on top features.
# In the middle of x axis red label denotes cutoff_value.
plot(result, type = "cv", measure = "wacc", cex = 0.8)
# Plot & print out confusion matrix. This matrix is the result of
# all classifications performed by all decision trees on all s*t datasets.
plot(result, type = "cmatrix")
# build interdependencies graph (all default parameters).
gid <- build.idgraph(result)
plot(gid)
# build interdependencies graph for top 6 features
# and top 12 interdependencies and plot all nodes
gid <- build.idgraph(result, size = 6, size_ID = 12, plot_all_nodes = TRUE)
plot(gid, label.dist = 1)
# Export graph to graphML (XML structure)
path <- tempdir()
igraph::write.graph(gid, file = paste0(path, "/artificial.graphml"),
format = "graphml", prefixAttr = FALSE)
# Export and import results to/from csv files
export.result(result, path = path, label = "artificial", save.rds = FALSE)
result <- import.result(path = path, label = "artificial")
####################################
########## Alizadeh data ###########
####################################
# Load Alizadeh dataset.
data(alizadeh)
info(alizadeh)
# Fix data types and data values - replace characters such as "," " " "/" etc.
# from values and column names and fix data types
# This function may help if mcfs has any problems with input data
alizadeh <- fix.data(alizadeh)
# Parametrize and run MCFS-ID procedure, projectionSize (m) is set at 5 percent
# of input columns. For larger data (thousands of features) default settings are good enough.
# This example may take few minutes but this one is a real dataset
result <- mcfs(class~., alizadeh, projections = 3000, projectionSize = 0.05,
cutoffPermutations = 20, threadsNumber = 8)
# Print basic information about mcfs result.
print(result)Run the code above in your browser using DataLab