# Must have a column called "genotype", so we'll create a fake one for now
# We will use CV00, which does not require any overlap in genotypes
# In real scenarios, CV schemes that rely on genotypes should not be applied
# when genotypes are unknown, as in this case.
library(magrittr)
trials <- ikeogu.2017 %>%
dplyr::mutate(genotype = 1:nrow(ikeogu.2017)) %>% # fake for this example
dplyr::rename(reference = DMC.oven) %>%
dplyr::select(
study.name, sample.id, genotype, reference,
tidyselect::starts_with("X")
)
trial1 <- trials %>%
dplyr::filter(study.name == "C16Mcal") %>%
dplyr::select(-study.name)
trial2 <- trials %>%
dplyr::filter(study.name == "C16Mval") %>%
dplyr::select(-study.name)
cv.list <- format_cv(
trial1 = trial1, trial2 = trial2, cv.scheme = "CV00",
stratified.sampling = FALSE, remove.genotype = TRUE
)
cv.list$train.set[1:5, 1:5]
cv.list$test.set[1:5, 1:5]
Run the code above in your browser using DataLab