## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(fig.width=5, fig.height=5)

## ---- echo=FALSE--------------------------------------------------------------
knitr::include_graphics("images/bootRanges.jpg")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(ExperimentHub))
eh = ExperimentHub()
# query(eh, "nullrangesdata")
seg_cbs <- eh[["EH7307"]]
seg_hmm <- eh[["EH7308"]]
seg <- seg_cbs

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(ensembldb))
suppressPackageStartupMessages(library(EnsDb.Hsapiens.v86))
edb <- EnsDb.Hsapiens.v86
filt <- AnnotationFilterList(GeneIdFilter("ENSG", "startsWith"))
g <- genes(edb, filter = filt)

## -----------------------------------------------------------------------------
library(GenomeInfoDb)
g <- keepStandardChromosomes(g, pruning.mode = "coarse")
seqlevels(g, pruning.mode="coarse") <- setdiff(seqlevels(g), "MT")
# normally we would assign a new style, but for recent host issues
## seqlevelsStyle(g) <- "UCSC" 
seqlevels(g) <- paste0("chr", seqlevels(g))
genome(g) <- "hg38"
g <- sortSeqlevels(g)
g <- sort(g)
table(seqnames(g))

## -----------------------------------------------------------------------------
# suppressPackageStartupMessages(library(AnnotationHub))
exclude <- eh[["EH7306"]]
all(seqlengths(g) == seqlengths(exclude))

## -----------------------------------------------------------------------------
library(nullranges)
suppressPackageStartupMessages(library(plyranges))
library(patchwork)

## ----seg-cbs------------------------------------------------------------------
set.seed(5)
exclude <- exclude %>%
  plyranges::filter(width(exclude) >= 500)
L_s <- 1e6
seg_cbs <- segmentDensity(g, n = 3, L_s = L_s,
                          exclude = exclude, type = "cbs")
plots <- lapply(c("ranges","barplot","boxplot"), function(t) {
  plotSegment(seg_cbs, exclude, type = t)
})
plots[[1]]
plots[[2]] + plots[[3]]

## -----------------------------------------------------------------------------
region <- GRanges("chr16", IRanges(3e7,4e7))
plotSegment(seg_cbs, exclude, type="ranges", region=region)

## ----seg-hmm------------------------------------------------------------------
seg_hmm <- segmentDensity(g, n = 3, L_s = L_s,
                          exclude = exclude, type = "hmm")
plots <- lapply(c("ranges","barplot","boxplot"), function(t) {
  plotSegment(seg_hmm, exclude, type = t)
})
plots[[1]]
plots[[2]] + plots[[3]]

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(nullrangesData))
dhs <- DHSA549Hg38()
dhs <- dhs %>% plyranges::filter(signalValue > 100) %>%
  mutate(id = seq_along(.)) %>%
  plyranges::select(id)
length(dhs)
table(seqnames(dhs))

## -----------------------------------------------------------------------------
set.seed(5) # for reproducibility
R <- 50
blockLength <- 5e5
boots <- bootRanges(dhs, blockLength, R = R, seg = seg, exclude=exclude)
boots

## -----------------------------------------------------------------------------
boots %>% group_by(iter) %>%
  summarize(
    n = n(),
    total_width = sum(width)
  )

## -----------------------------------------------------------------------------
dhs_filt <- dhs %>% filter_by_non_overlaps(exclude)
dhs_filt %>% summarize(n = n(), total_width=sum(width))

## -----------------------------------------------------------------------------
x <- GRanges("chr2", IRanges(1 + 50:99 * 1e6, width=1e6), x_id=1:50)
x <- x %>% mutate(n_overlaps = count_overlaps(., dhs_filt))
mean( x$n_overlaps )

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(tidyr))

## -----------------------------------------------------------------------------
boot_stats <- x %>% join_overlap_inner(boots) %>%
  group_by(x_id, iter) %>%
  summarize(n_overlaps = n()) %>%
  as.data.frame() %>%
  complete(x_id, iter, fill=list(n_overlaps = 0)) %>%
  group_by(iter) %>%
  summarize(meanOverlaps = mean(n_overlaps))

## ----boot-hist----------------------------------------------------------------
suppressPackageStartupMessages(library(ggplot2))
ggplot(boot_stats, aes(meanOverlaps)) +
  geom_histogram(binwidth=.2)

## -----------------------------------------------------------------------------
library(microbenchmark)
microbenchmark(
  list=alist(
    prop = bootRanges(dhs, blockLength, seg = seg, proportionLength = TRUE),
    no_prop = bootRanges(dhs, blockLength, seg = seg, proportionLength = FALSE)
), times=10)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(plotgardener))
my_palette <- function(n) {
  head(c("red","green3","red3","dodgerblue",
         "blue2","green4","darkred"), n)
}
plotGRanges <- function(gr) {
  pageCreate(width = 5, height = 5, xgrid = 0,
                ygrid = 0, showGuides = TRUE)
  for (i in seq_along(seqlevels(gr))) {
    chrom <- seqlevels(gr)[i]
    chromend <- seqlengths(gr)[[chrom]]
    suppressMessages({
      p <- pgParams(chromstart = 0, chromend = chromend,
                    x = 0.5, width = 4*chromend/500, height = 2,
                    at = seq(0, chromend, 50),
                    fill = colorby("state_col", palette=my_palette))
      prngs <- plotRanges(data = gr, params = p,
                          chrom = chrom,
                          y = 2 * i,
                          just = c("left", "bottom"))
      annoGenomeLabel(plot = prngs, params = p, y = 0.1 + 2 * i)
    })
  }
}

## -----------------------------------------------------------------------------
library(GenomicRanges)
seq_nms <- rep(c("chr1","chr2"), c(4,3))
seg <- GRanges(
  seqnames = seq_nms,
  IRanges(start = c(1, 101, 201, 401, 1, 201, 301),
          width = c(100, 100, 200, 100, 200, 100, 100)),
  seqlengths=c(chr1=500,chr2=400),
  state = c(1,2,1,3,3,2,1),
  state_col = factor(1:7)
)

## ----toysegments--------------------------------------------------------------
plotGRanges(seg)

## ----toyranges----------------------------------------------------------------
set.seed(1)
n <- 200
gr <- GRanges(
  seqnames=sort(sample(c("chr1","chr2"), n, TRUE)),
  IRanges(start=round(runif(n, 1, 500-10+1)), width=10)
)
suppressWarnings({
  seqlengths(gr) <- seqlengths(seg)
})
gr <- gr[!(seqnames(gr) == "chr2" & end(gr) > 400)]
gr <- sort(gr)
idx <- findOverlaps(gr, seg, type="within", select="first")
gr <- gr[!is.na(idx)]
idx <- idx[!is.na(idx)]
gr$state <- seg$state[idx]
gr$state_col <- factor(seg$state_col[idx])
plotGRanges(gr)

## ----toy-no-prop--------------------------------------------------------------
set.seed(1)
gr_prime <- bootRanges(gr, blockLength = 25, seg = seg,
                       proportionLength = FALSE)
plotGRanges(gr_prime)

## ----toy-prop-----------------------------------------------------------------
set.seed(1)
gr_prime <- bootRanges(gr, blockLength = 50, seg = seg,
                       proportionLength = TRUE)
plotGRanges(gr_prime)

## -----------------------------------------------------------------------------
sessionInfo()