## Simulate Data ##
# DATA: 10000x50
# BC1: 200x10
# BC2: 100x10
# BC1 and BC2 overlap 5 columns
# BC3: 200x10
# BC4: 100x10
# BC3 and bC4 overlap 2 columns
# Background 1 percentage: 0.15
# BC Signal Percentage: 0.9
set.seed(273)
mat <- matrix(sample(c(0,1),10000*50,replace=TRUE,prob=c(1-0.15,0.15)),
nrow=10000,ncol=50)
mat[1:200,1:10] <- matrix(sample(c(0,1),200*10,replace=TRUE,prob=c(1-0.9,0.9)),
nrow=200,ncol=10)
mat[300:399,6:15] <- matrix(sample(c(0,1),100*10,replace=TRUE,prob=c(1-0.9,0.9)),
nrow=100,ncol=10)
mat[400:599,21:30] <- matrix(sample(c(0,1),200*10,replace=TRUE,prob=c(1-0.9,0.9)),
nrow=200,ncol=10)
mat[700:799,29:38] <- matrix(sample(c(0,1),100*10,replace=TRUE,prob=c(1-0.9,0.9)),
nrow=100,ncol=10)
mat <- mat[sample(1:10000,10000,replace=FALSE),sample(1:50,50,replace=FALSE)]
# Computing gap statistic for initial 1381 BC takes approx. 15 min.
# Gap Statistic chooses 4 clusters.
out <- BiBitWorkflow(matrix=mat,minr=50,minc=5,noise=0.2)
summary(out$Biclust)
# Reduce computation by selecting number of clusters manually.
# Note: The "ClusterRowCoverage" function can be used to provided extra info
# on the number of cluster choice.
# How?
# - More clusters result in smaller column patterns and more matching rows.
# - Less clusters result in larger column patterns and less matching rows.
# Step 1: Initial Workflow Run
out2 <- BiBitWorkflow(matrix=mat,minr=50,minc=5,noise=0.2,cut_type="number",cut_pm=10)
# Step 2: Use ClusterRowCoverage
temp <- ClusterRowCoverage(result=out2,matrix=mat,noise=0.2,plots=2)
# Step 3: Use BiBitWorkflow again (using previously computed parts) with new cut parameter
out3 <- BiBitWorkflow(matrix=mat,minr=50,minc=5,noise=0.2,cut_type="number",cut_pm=4,
BCresult = out2$info$BiclustInitial,
simmatresult = out2$info$BiclustSimInitial)
summary(out3$Biclust)
Run the code above in your browser using DataLab