## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(dpi=25,fig.width=7)

## ----env, message = FALSE, warning = FALSE, echo = TRUE-----------------------
library(goSorensen)

## ---- eval=FALSE--------------------------------------------------------------
#  ## Only if BiocManager is not previosly installed:
#  install.packages("BiocManager")
#  
#  ## otherwise, directly:
#  BiocManager::install("goSorensen")

## ---- eval=FALSE--------------------------------------------------------------
#  devtools::install_github("pablof1988/goSorensen", build_vignettes = TRUE)

## -----------------------------------------------------------------------------
data("allOncoGeneLists")

## -----------------------------------------------------------------------------
data("humanEntrezIDs")
length(allOncoGeneLists)
sapply(allOncoGeneLists, length)
# First 20 gene identifiers of gene lists Vogelstein and sanger:
allOncoGeneLists[["Vogelstein"]][1:20]
allOncoGeneLists[["sanger"]][1:20]
# Build the enrichment contingency table between gene lists Vogelstein and 
# sanger for the MF ontology at GO level 5:
enrichTab <- buildEnrichTable(allOncoGeneLists[["Vogelstein"]],
                              allOncoGeneLists[["sanger"]],
                              geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db",
                              onto = "MF", GOLevel = 5, listNames = c("Vogelstein", "sanger"))
enrichTab

# Equivalence test for an equivalence (or negligibility) limit 0.2857
testResult <- equivTestSorensen(enrichTab, d0 = 0.2857)
testResult

## -----------------------------------------------------------------------------
equivTestSorensen(allOncoGeneLists[["Vogelstein"]], allOncoGeneLists[["sanger"]], d0 = 0.2857,
                              geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db",
                              onto = "MF", GOLevel = 5, listNames = c("Vogelstein", "sanger"))

## -----------------------------------------------------------------------------
boot.testResult <- equivTestSorensen(enrichTab, d0 = 0.2857, boot = TRUE)
boot.testResult

## -----------------------------------------------------------------------------
getDissimilarity(testResult)
getSE(testResult)
getPvalue(testResult)
getTable(testResult)
getUpper(testResult)

# In the bootstrap approach, only these differ:
getPvalue(boot.testResult)
getUpper(boot.testResult)
# (Only available for bootstrap tests) efective number of bootstrap resamples:
getNboot(boot.testResult)

## -----------------------------------------------------------------------------
# The dissimilarity:
dSorensen(enrichTab)
# Or from scratch, directly from both gene lists:
dSorensen(allOncoGeneLists[["Vogelstein"]], allOncoGeneLists[["sanger"]],
                              geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db",
                              onto = "MF", GOLevel = 5, listNames = c("Vogelstein", "sanger"))
# The first option is faster, it avoids internally building the enrichment
# contingency table

# Its standard error:
seSorensen(enrichTab)
# or:
seSorensen(allOncoGeneLists[["Vogelstein"]], allOncoGeneLists[["sanger"]],
                              geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db",
                              onto = "MF", GOLevel = 5, listNames = c("Vogelstein", "sanger"))

# Upper limit of the confidence interval for the true distance:
duppSorensen(enrichTab)
duppSorensen(enrichTab, conf.level = 0.90)
duppSorensen(enrichTab, conf.level = 0.90, boot = TRUE)
duppSorensen(allOncoGeneLists[["Vogelstein"]], allOncoGeneLists[["sanger"]],
                              geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db",
                              onto = "MF", GOLevel = 5, listNames = c("Vogelstein", "sanger"))

## -----------------------------------------------------------------------------
dSorensen(allOncoGeneLists, onto = "MF", GOLevel = 5, 
          geneUniverse = humanEntrezIDs, orgPackg = "org.Hs.eg.db")

## -----------------------------------------------------------------------------
allTests <- equivTestSorensen(allOncoGeneLists, d0 = 0.2857, 
                              onto = "MF", GOLevel = 5, 
                              geneUniverse = humanEntrezIDs, 
                              orgPackg = "org.Hs.eg.db")
getPvalue(allTests)
getDissimilarity(allTests, simplify = FALSE)

## -----------------------------------------------------------------------------
enrichMat <- matrix(c(20, 1, 9, 2149), nrow = 2)
enrichMat
dSorensen(enrichMat)
enrichVec <- c(20, 1, 9, 2149)
equivTestSorensen(enrichVec)
equivTestSorensen(enrichVec, boot = TRUE)

len3Vec <- c(20, 1, 9)
dSorensen(len3Vec)
seSorensen(len3Vec)
duppSorensen(len3Vec)
# Error, bootstrapping requires the full (4 values) contingency table:
try(duppSorensen(len3Vec, boot = TRUE), TRUE)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()