# NOT RUN {
library(dplyr)
library(ggplot2)
data("synthetic")
tg <- expand.grid(
miss_val = fivenum(synthetic),
similarity = c("Rand", "HA", "Jaccard"),
col_min_num = 2,
row_min_num = 2,
col_clusters = 3:5,
row_clusters = 2
)
tg
# in parallel: two cores:
tbc <- tune_biclustermd(synthetic, nrep = 2, parallel = TRUE, ncores = 2, tune_grid = tg)
tbc
tbc$grid %>%
group_by(miss_val, col_clusters) %>%
summarise(avg_sd = mean(sd_sse)) %>%
ggplot(aes(miss_val, avg_sd, color = col_clusters, group = col_clusters)) +
geom_line() +
geom_point()
tbc <- tune_biclustermd(synthetic, nrep = 2, tune_grid = tg)
tbc
boxplot(tbc$grid$mean_sse ~ tbc$grid$similarity)
boxplot(tbc$grid$sd_sse ~ tbc$grid$similarity)
# nycflights13::flights dataset
# }
# NOT RUN {
library(nycflights13)
data("flights")
library(dplyr)
flights_bcd <- flights %>%
select(month, dest, arr_delay)
flights_bcd <- flights_bcd %>%
group_by(month, dest) %>%
summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
spread(dest, mean_arr_delay) %>%
as.data.frame()
# months as rows
rownames(flights_bcd) <- flights_bcd$month
flights_bcd <- as.matrix(flights_bcd[, -1])
flights_grid <- expand.grid(
row_clusters = 4,
col_clusters = c(6, 9, 12),
miss_val = fivenum(flights_bcd),
similarity = c("Rand", "Jaccard")
)
# RUN TIME: approximately 40 seconds across two cores.
flights_tune <- tune_biclustermd(
flights_bcd,
nrep = 10,
parallel = TRUE,
ncores = 2,
tune_grid = flights_grid
)
flights_tune
# }
Run the code above in your browser using DataLab