if (FALSE) {
library(BigDataStatMeth)
# Create test SNP data with missing values
snps <- matrix(sample(c(0, 1, 2, NA), 100, replace = TRUE,
prob = c(0.3, 0.3, 0.3, 0.1)), 10, 10)
# Save to HDF5
fn <- "snp_data.hdf5"
bdCreate_hdf5_matrix(fn, snps, "genotype", "raw_snps",
overwriteFile = TRUE)
# Remove SNPs with low representation
bdRemovelowdata_hdf5(
filename = fn,
group = "genotype",
dataset = "raw_snps",
outgroup = "genotype_filtered",
outdataset = "filtered_snps",
pcent = 0.3,
bycols = TRUE
)
# Cleanup
if (file.exists(fn)) {
file.remove(fn)
}
}
Run the code above in your browser using DataLab