PeakSegPDPAchrom: PeakSegPDPAchrom

Description

Find the optimal change-points using the Poisson loss and the PeakSeg constraint. This function is a user-friendly interface to the PeakSegPDPA function.

Usage

PeakSegPDPAchrom(count.df, 
    max.peaks = NULL)

Value

List of data.frames: segments can be used for plotting the segmentation model, loss describes model loss and feasibility, modelSelection.feasible describes the set of all linear penalty (lambda) values which can be used to select the feasible models, modelSelection.decreasing selects from all models that decrease the Poisson loss relative to simpler models (same as PeakSegFPOP).

Arguments

count.df: data.frame with columns count, chromStart, chromEnd.
max.peaks: integer > 0: maximum number of peaks.

Author

Toby Dylan Hocking <toby.hocking@r-project.org> [aut, cre]

Examples

Run this code


## samples for which pdpa recovers a more likely model, but it is
## not feasible for the PeakSeg problem (some segment means are
## equal).
sample.id <- "McGill0322"
sample.id <- "McGill0079"
sample.id <- "McGill0106"
n.peaks <- 3
library(PeakSegOptimal)
data("H3K4me3_XJ_immune_chunk1", envir=environment())
H3K4me3_XJ_immune_chunk1$count <- H3K4me3_XJ_immune_chunk1$coverage
by.sample <-
  split(H3K4me3_XJ_immune_chunk1, H3K4me3_XJ_immune_chunk1$sample.id)
one.sample <- by.sample[[sample.id]]
pdpa.fit <- PeakSegPDPAchrom(one.sample, 9L)
pdpa.segs <- subset(pdpa.fit$segments, n.peaks == peaks)
both.segs.list <- list(pdpa=data.frame(pdpa.segs, algorithm="PDPA"))
pdpa.breaks <- subset(pdpa.segs, 1 < first)
pdpa.breaks$feasible <- ifelse(
  diff(pdpa.segs$mean)==0, "infeasible", "feasible")
both.breaks.list <- list(pdpa=data.frame(pdpa.breaks, algorithm="PDPA"))
if(require(PeakSegDP)){
  dp.fit <- PeakSegDP(one.sample, 9L)
  dp.segs <- subset(dp.fit$segments, n.peaks == peaks)
  dp.breaks <- subset(dp.segs, 1 < first)
  dp.breaks$feasible <- "feasible"
  both.segs.list$dp <- data.frame(dp.segs, algorithm="cDPA")
  both.breaks.list$dp <- data.frame(dp.breaks, algorithm="cDPA")
}
both.segs <- do.call(rbind, both.segs.list)
both.breaks <- do.call(rbind, both.breaks.list)
library(ggplot2)
ggplot()+
  theme_bw()+
  theme(panel.margin=grid::unit(0, "lines"))+
  facet_grid(algorithm ~ ., scales="free")+
  geom_step(aes(chromStart/1e3, coverage),
            data=one.sample, color="grey")+
  geom_segment(aes(chromStart/1e3, mean,
                   xend=chromEnd/1e3, yend=mean),
               color="green",
               data=both.segs)+
  scale_linetype_manual(values=c(feasible="dotted", infeasible="solid"))+
  geom_vline(aes(xintercept=chromStart/1e3, linetype=feasible),
             color="green",
             data=both.breaks)

## samples for which pdpa recovers some feasible models that the
## heuristic dp does not.
sample.id.vec <- c(
  "McGill0091", "McGill0107", "McGill0095",
  "McGill0059", "McGill0029", "McGill0010")
sample.id <- sample.id.vec[4]
one.sample <- by.sample[[sample.id]]
pdpa.fit <- PeakSegPDPAchrom(one.sample, 9L)
gg.loss <- ggplot()+
  scale_color_manual(values=c("TRUE"="black", "FALSE"="red"))+
  scale_size_manual(values=c(cDPA=1.5, PDPA=3))+
  scale_fill_manual(values=c(cDPA="white", PDPA="black"))+
  guides(color=guide_legend(override.aes=list(fill="black")))+
  geom_point(aes(peaks, PoissonLoss,
                 size=algorithm, fill=algorithm, color=feasible),
             shape=21,
             data=data.frame(pdpa.fit$loss, algorithm="PDPA"))
if(require(PeakSegDP)){
  dp.fit <- PeakSegDP(one.sample, 9L)
  gg.loss <- gg.loss+
    geom_point(aes(peaks, error,
                   size=algorithm, fill=algorithm),
               shape=21,
               data=data.frame(dp.fit$error, algorithm="cDPA"))
}
gg.loss

diff.df <- data.frame(
  PeakSegPDPA.loss=pdpa.fit$loss$PoissonLoss,
  PeakSegDP.loss=dp.fit$error$error,
  peaks=dp.fit$error$peaks)
ggplot()+
  geom_point(aes(peaks, PeakSegDP.loss - PeakSegPDPA.loss), data=diff.df)

Run the code above in your browser using DataLab