set.seed(seed = 1234556)
data(sim_data)
count_matrix <- sim_data$count_matrix
meta_cell <- sim_data$meta_cell
gene_index <- sim_data$gene_index
meta_ind <- sim_data$meta_ind
obj1 <- DiSC(data.mat = count_matrix, cell.ind = meta_cell,
metadata = meta_ind, outcome = "phenotype",
covariates = "RIN", cell.id = "cell_id",
individual.id = "individual", perm.no = 999,
features = c('prev', 'm', 'nzsd'), verbose = TRUE,
sequencing.data = TRUE)
# Type I error (the nominal level: 0.05)
mean(obj1$p.raw[gene_index$EE_index] <= 0.05)
# True positive rate (based on raw P-values, the higher the better.)
mean(obj1$p.raw[gene_index$mean_index] <= 0.05)
mean(obj1$p.raw[gene_index$var_index] <= 0.05)
mean(obj1$p.raw[gene_index$mean_var_index] <= 0.05)
# False discovery rate (the nominal level: 0.10)
sum(obj1$p.adj.fdr[gene_index$EE_index] <= 0.10)/
sum(obj1$p.adj.fdr <= 0.10)
# True positive rate (based on FDR-adjusted P-values, the higher the better.)
mean(obj1$p.adj.fdr[gene_index$mean_index] <= 0.10)
mean(obj1$p.adj.fdr[gene_index$var_index] <= 0.10)
mean(obj1$p.adj.fdr[gene_index$mean_var_index] <= 0.10)
# By default, DiSC normalizes the scRNA-seq data using TSS (total sum scaling),
# adjusted for log median sequencing depths
# Other user-specified normalization methods can also be used:
# log2 transformed, adjusted for log median sequencing depth
# data_mat_log <- log2(data_mat+1)
# inds <- unique(meta_cell[["individual"]])
# meta_ind <- meta_ind[base::match(inds, meta_ind[["individual"]]), ]
# data_mat <- count_matrix
# depth <- colSums(data_mat)
# cell.list <- list()
# for (ind in inds)
# cell.list[[ind]] <- meta_cell[meta_cell$individual == ind, ][["cell_id"]]
# log_md_depth <- numeric(length = length(inds))
# names(log_md_depth) <- inds
# for(ind in inds)
# log_md_depth[ind] <- log(median(depth[cell.list[[ind]]]))
# meta_ind$log_md_depth <- log_md_depth
# obj_log <-
# DiSC(data.mat = data_mat_log, cell.ind = meta_cell,
# metadata = meta_ind, outcome = "phenotype",
# covariates = c("RIN", "log_md_depth"),
# cell.id = "cell_id", individual.id = "individual",
# perm.no = 999, verbose = FALSE,
# sequencing.data = FALSE, # sequencing.data needs to be FALSE
# features = c('prev', 'm', 'nzsd'))
# Size factor: DESeq2, adjusted for log median sequencing depths
# require(DESeq2)
# colData <- data.frame(condition = rep(meta_ind$phenotype, each = 375),
# row.names = colnames(data_mat))
# dds <- DESeq2::DESeqDataSetFromMatrix(countData = data_mat + 1,
# avoid every gene contains at least one zero
# colData = colData, design = ~ condition)
# dds <- DESeq2::estimateSizeFactors(dds)
# data_mat_des <- sweep(data_mat, 2, DESeq2::sizeFactors(dds), FUN = "/")
# obj_des <-
# DiSC(data.mat = data_mat_des, cell.ind = meta_cell,
# metadata = meta_ind, outcome = "phenotype",
# covariates = c("RIN", "log_md_depth"),
# cell.id = "cell_id", individual.id = "individual",
# perm.no = 999, verbose = FALSE,
# sequencing.data = FALSE, # sequencing.data needs to be FALSE
# features = c('prev', 'm', 'nzsd'))
Run the code above in your browser using DataLab