gdb.init_examples()
# Create virtual tracks for stratification
gvtrack.create("g_frac", NULL, "kmer.frac", kmer = "G")
gvtrack.create("c_frac", NULL, "kmer.frac", kmer = "C")
gvtrack.create("cg_frac", NULL, "kmer.frac", kmer = "CG")
gvtrack.create("masked_frac", NULL, "masked.frac")
# Define repeat mask
repeats <- gscreen("masked_frac > 0.5",
intervals = gintervals.all(),
iterator = 100
)
# Train unstratified model (no stratification)
model_0d <- gsynth.train(
mask = repeats,
intervals = gintervals.all(),
iterator = 200
)
# Train model with 2D stratification (GC content and CG dinucleotide)
model <- gsynth.train(
list(
expr = "g_frac + c_frac",
breaks = seq(0, 1, 0.025),
bin_merge = list(list(from = 0.7, to = c(0.675, 0.7)))
),
list(
expr = "cg_frac",
breaks = c(0, 0.01, 0.02, 0.03, 0.04, 0.2),
bin_merge = list(list(from = 0.04, to = c(0.03, 0.04)))
),
mask = repeats,
intervals = gintervals.all(),
iterator = 200
)
Run the code above in your browser using DataLab