## Toy example: Artificial genome with two chromosomes
genome = DNAStringSet(c("AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTT", "GGGGGGGGGGGGGGGGGGGGCCCCCCCCCCCCCCCCCCCC"))
names(genome) = c("chr1","chr2")
#######################
## Example 1: Deletions
## Simulation of 5 deletions of 5bp each
sim = simulateSV(output=NA, genome=genome, dels=5, sizeDels=5, bpSeqSize=10, seed=246)
simSVs = metadata(sim)$deletions
## An SV detection in BED format may look like this:
## Four of five deletions were detected; two with exact and two with an approximate breakpoint
## Two additional deletions were detected, which were not part of the simulation
## The column with the breakpoint sequence is optional, the column names not important (BED-files have no header)
querySVs = data.frame(
chr=c("chr1","chr1","chr1","chr2","chr2","chr2"),
start=c(4,12,20,10,21,34),
end=c(8,16,28,14,31,38),
bpSeq=c("AAAAAAAAAA", "AAAAAAAAAT", "AAAATTTTTT", "GGGGGGGGGG", "GGGGGGGCCC", "CCCCCCCCCC")
)
## Compare the SVs with 0bp tolerance:
## Only the two exact detections have an overlap
simSVs_overlap1 = compareSV(querySVs, simSVs, tol=0)
simSVs_overlap1
## Increasing the breakpoint tolerance to +/- 3bp :
## Now, the overlap also includes the more imprecise detections
## And the sensitivity and precision are better
## Note that for deletion2, the breakpoint sequence matches only by 50%
simSVs_overlap2 = compareSV(querySVs, simSVs, tol=3)
simSVs_overlap2
############################
## Example 2: Translocations
## Simulation of 2 translocations (only one of them is balanced):
sim = simulateSV(output=NA, genome=genome, trans=2, percBalancedTrans=0.5, bpSeqSize=10, seed=246)
simSVs = metadata(sim)$translocations
## Detected translocations have to be given in BEDPE-format (i.e. at least six columns with chr,start,end for breakpoints on both chromosomes)
## In this example, the breakpoints were approximated up to 1 or 2bp
## Optional breakpoint sequences are missing
querySVs = data.frame(
chr=c("chr2", "chr1", "chr2"),
start1=c(25,3,9),
end1=c(29,7,12),
chr2=c("chr1","chr2","chr1"),
start2=c(22,10,3),
end2=c(25,13,4)
)
simSVs_overlap = compareSV(querySVs, simSVs, tol=0)
simSVs_overlapRun the code above in your browser using DataLab