# NOT RUN {
data(FF4_qc) # load demo dataset
# QC as training samples; QC1, QC2 and QC3 as test samples:
train_samples <- FF4_qc[FF4_qc$sampleType == "QC",]
test_samples <- FF4_qc[FF4_qc$sampleType != "QC",]
# col_sampleID includes labels. You can assign names for different samples:
train_samples$sampleID <- "train"
test_samples$sampleID <- "test"
# Use default setting and
# include injection order and well position into feature set:
test_norm_1 <- run_TIGER(test_samples = test_samples,
train_samples = train_samples,
col_sampleID = "sampleID", # input column name
col_sampleType = "sampleType", # input column name
col_batchID = "plateID", # input column name
col_order = "injectionOrder", # input column name
col_position = "wellPosition", # input column name
parallel.cores = 2)
# If the information of injection order and well position is not available,
# or you don't want to use them:
train_data <- train_samples[-c(4:5)] # remove the two columns
test_data <- test_samples[-c(4:5)] # remove the two columns
test_norm_2 <- run_TIGER(test_samples = test_data,
train_samples = train_data,
col_sampleID = "sampleID",
col_sampleType = "sampleType",
col_batchID = "plateID",
col_order = NULL, # set NULL
col_position = NULL, # set NULL
parallel.cores = 2)
# If use external target values and selected variables with
# customised settings:
target_val <- compute_targetVal(QC_num = train_samples[-c(1:5)],
sampleType = train_samples$sampleType,
batchID = train_samples$plateID,
targetVal_method = "median",
targetVal_batchWise = TRUE)
select_var <- select_variable(train_num = train_samples[-c(1:5)],
test_num = test_samples[-c(1:5)],
train_batchID = train_samples$plateID,
test_batchID = test_samples$plateID,
selectVar_corType = "pcor",
selectVar_corMethod = "spearman",
selectVar_minNum = 10,
selectVar_maxNum = 30,
selectVar_batchWise = TRUE)
test_norm_3 <- run_TIGER(test_samples = test_samples,
train_samples = train_samples,
col_sampleID = "sampleID",
col_sampleType = "sampleType",
col_batchID = "plateID",
col_order = "injectionOrder",
col_position = "wellPosition",
targetVal_external = target_val,
selectVar_external = select_var,
parallel.cores = 2)
# The definitions of other hyperparameters correspond to
# randomForest::randomForest().
# If want to include more hyperparameters into model training,
# put hyperparameter values like this:
mtry_percent <- c(0.4, 0.8)
nodesize_percent <- c(0.4, 0.8)
replace <- c(TRUE, FALSE)
ntree <- c(100, 200, 300)
test_norm_4 <- run_TIGER(test_samples = test_data,
train_samples = train_data,
col_sampleID = "sampleID",
col_sampleType = "sampleType",
col_batchID = "plateID",
mtry_percent = mtry_percent,
nodesize_percent = nodesize_percent,
replace = replace,
ntree = ntree,
parallel.cores = 2)
# test_norm_4 is corrected by the ensemble model consisted of base learners
# trained with (around) 24 different hyperparameter combinations:
expand.grid(mtry_percent, nodesize_percent, replace, ntree)
# Note: mtry and nodesize are calculated by mtry_percent and nodesize_percent,
# duplicated hyperparameter combinations, if any, will be removed.
# Thus, the total number of hyperparameter combinations can be less than 24.
# This is determined by the shape of your input datasets.
# }
Run the code above in your browser using DataLab