gdb.init_examples()
# Create virtual tracks
gvtrack.create("g_frac", NULL, "kmer.frac", kmer = "G")
gvtrack.create("c_frac", NULL, "kmer.frac", kmer = "C")
gvtrack.create("cg_frac", NULL, "kmer.frac", kmer = "CG")
gvtrack.create("masked_frac", NULL, "masked.frac")
# Define repeat mask (regions to preserve from original)
repeats <- gscreen("masked_frac > 0.5",
intervals = gintervals.all(),
iterator = 100
)
# Train model (excluding repeats from training)
model <- gsynth.train(
list(expr = "g_frac + c_frac", breaks = seq(0, 1, 0.025)),
list(expr = "cg_frac", breaks = c(0, 0.01, 0.02, 0.03, 0.04, 0.2)),
mask = repeats,
iterator = 200,
min_obs = 1000
)
# Sample with mask_copy to preserve repeats from original genome
temp_dir <- tempdir()
synthetic_genome_file <- file.path(temp_dir, "synthetic_genome.fa")
gsynth.sample(model, synthetic_genome_file,
output_format = "fasta",
mask_copy = repeats,
seed = 60427,
bin_merge = list(
list(list(from = 0.7, to = c(0.675, 0.7))),
list(list(from = 0.04, to = c(0.03, 0.04)))
)
)
Run the code above in your browser using DataLab