## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----installation, eval=FALSE-------------------------------------------------
#  
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("SpatialDecon")
#  

## ----setup--------------------------------------------------------------------
library(SpatialDecon)
library(SeuratObject)

## ----loaddata-----------------------------------------------------------------
# download data:
con <- gzcon(url("https://github.com/almaan/her2st/raw/master/data/ST-cnts/G1.tsv.gz"))
txt <- readLines(con)
temp <- read.table(textConnection(txt), sep = "\t", header = TRUE, row.names = 1)
# parse data
raw = t(as.matrix(temp))
norm = sweep(raw, 2, colSums(raw), "/") * mean(colSums(raw))
x = as.numeric(substr(rownames(temp), 1, unlist(gregexpr("x", rownames(temp))) - 1))
y = -as.numeric(substr(rownames(temp), unlist(gregexpr("x", rownames(temp))) + 1, nchar(rownames(temp))))
# put into a seurat object:
andersson_g1 = CreateSeuratObject(counts = raw, assay="Spatial")
andersson_g1@meta.data$x = x
andersson_g1@meta.data$y = y


## ----showsafetme, fig.height=5, fig.width=10, fig.cap = "The safeTME cell profile matrix"----
data("safeTME")
data("safeTME.matches")

signif(safeTME[seq_len(3), seq_len(3)], 2)

heatmap(sweep(safeTME, 1, apply(safeTME, 1, max), "/"),
        labRow = NA, margins = c(10, 5))


## ----downloadmatrix, fig.height=7, fig.width=10, fig.cap = "The Mouse Spleen profile matrix", eval=T----
mousespleen <- download_profile_matrix(species = "Mouse",
                                       age_group = "Adult", 
                                       matrixname = "Spleen_MCA")
dim(mousespleen)

mousespleen[1:4,1:4]

head(cellGroups)

metadata

heatmap(sweep(mousespleen, 1, apply(mousespleen, 1, max), "/"),
        labRow = NA, margins = c(10, 5), cexCol = 0.7)


## ----single cell data---------------------------------------------------------
data("mini_singleCell_dataset")

mini_singleCell_dataset$mtx@Dim # genes x cells

as.matrix(mini_singleCell_dataset$mtx)[1:4,1:4]

head(mini_singleCell_dataset$annots)

table(mini_singleCell_dataset$annots$LabeledCellType)


## ----creatematrix, fig.height=7, fig.width=10, fig.cap = "Custom profile matrix"----
custom_mtx <- create_profile_matrix(mtx = mini_singleCell_dataset$mtx,            # cell x gene count matrix
                                    cellAnnots = mini_singleCell_dataset$annots,  # cell annotations with cell type and cell name as columns 
                                    cellTypeCol = "LabeledCellType",  # column containing cell type
                                    cellNameCol = "CellID",           # column containing cell ID/name
                                    matrixName = "custom_mini_colon", # name of final profile matrix
                                    outDir = NULL,                    # path to desired output directory, set to NULL if matrix should not be written
                                    normalize = FALSE,                # Should data be normalized? 
                                    minCellNum = 5,                   # minimum number of cells of one type needed to create profile, exclusive
                                    minGenes = 10,                    # minimum number of genes expressed in a cell, exclusive
                                    scalingFactor = 5,                # what should all values be multiplied by for final matrix
                                    discardCellTypes = TRUE)          # should cell types be filtered for types like mitotic, doublet, low quality, unknown, etc.

head(custom_mtx)

heatmap(sweep(custom_mtx, 1, apply(custom_mtx, 1, max), "/"),
        labRow = NA, margins = c(10, 5), cexCol = 0.7)


## ----createSeuratmatrix-------------------------------------------------------
library(SeuratObject)

data("mini_singleCell_dataset")

rownames(mini_singleCell_dataset$annots) <- mini_singleCell_dataset$annots$CellID

seuratObject <- CreateSeuratObject(counts = mini_singleCell_dataset$mtx, meta.data = mini_singleCell_dataset$annots)
Idents(seuratObject) <- seuratObject$LabeledCellType

rm(mini_singleCell_dataset)

annots <- data.frame(cbind(cellType=as.character(Idents(seuratObject)), 
                           cellID=names(Idents(seuratObject))))

custom_mtx_seurat <- create_profile_matrix(mtx = seuratObject@assays$RNA@counts, 
                                           cellAnnots = annots, 
                                           cellTypeCol = "cellType", 
                                           cellNameCol = "cellID", 
                                           matrixName = "custom_mini_colon",
                                           outDir = NULL, 
                                           normalize = FALSE, 
                                           minCellNum = 5, 
                                           minGenes = 10)

head(custom_mtx_seurat)

paste("custom_mtx and custom_mtx_seurat are identical", all(custom_mtx == custom_mtx_seurat))

## ----runiss-------------------------------------------------------------------
res = runspatialdecon(object = andersson_g1,
                      bg = 0.01,
                      X = safeTME,
                      align_genes = TRUE)
str(res)

## ----plotissres, fig.height = 5, fig.width = 8, fig.cap = "Cell abundance estimates"----
heatmap(res$beta, cexCol = 0.5, cexRow = 0.7, margins = c(10,7))

## ----showmatches--------------------------------------------------------------
str(safeTME.matches)

## ----puretumor----------------------------------------------------------------
sharedgenes = intersect(rownames(raw), rownames(safeTME))
plot(colSums(raw), colSums(raw[sharedgenes, ]), log = "xy")
hist(colSums(raw[sharedgenes, ]) / colSums(raw), breaks = 20)
alltumor = colSums(raw[sharedgenes, ]) / colSums(raw) < 0.03  # for alma data

table(alltumor)


## ----wts----------------------------------------------------------------------
sd_from_noise <- runErrorModel(counts = raw, platform = "st") 
wts <- 1 / sd_from_noise

## ----runisstils---------------------------------------------------------------

# run spatialdecon with all the bells and whistles:
restils = runspatialdecon(object = andersson_g1,           # Seurat object 
                          X = safeTME,                     # safeTME matrix, used by default
                          bg = 0.01,                       # Recommended value of 0.01 in Visium/ST data
                          wts = wts,                       # weight
                          cellmerges = safeTME.matches,    # safeTME.matches object, used by default
                          is_pure_tumor = alltumor,        # identities of the Tumor segments/observations
                          n_tumor_clusters = 5)            # how many distinct tumor profiles to append to safeTME

str(restils)

## ----shownewX, fig.height=5, fig.width=8, fig.cap = "safeTME merged with newly-derived tumor profiles"----
heatmap(sweep(restils$X, 1, apply(restils$X, 1, max), "/"),
         labRow = NA, margins = c(10, 5))


## ----florets, fig.width=8, fig.height=6, fig.cap = "TIL abundance plotted on PC space"----
# PCA of the normalized data:
pc = prcomp(t(log2(pmax(norm, 1))))$x[, c(1, 2)]

# run florets function:
par(mar = c(5,5,1,1))
layout(mat = (matrix(c(1, 2), 1)), widths = c(6, 2))
florets(x = pc[, 1], y = pc[, 2],
        b = restils$beta, cex = .5,
        xlab = "PC1", ylab = "PC2")
par(mar = c(0,0,0,0))
frame()
legend("center", fill = cellcols[rownames(restils$beta)], 
       legend = rownames(restils$beta), cex = 0.7)

## ----floretsxy, fig.width=8, fig.height=6, fig.cap = "TIL abundance plotted on physical space"----
# and plot florets in space:
par(mar = c(5,5,1,1))
layout(mat = (matrix(c(1, 2), 1)), widths = c(6, 2))
florets(x = x, y = y,
        b = restils$beta, cex = .5,
        xlab = "", ylab = "")
par(mar = c(0,0,0,0))
frame()
legend("center", fill = cellcols[rownames(restils$beta)], 
       legend = rownames(restils$beta), cex = 0.7)



## ----collapse, fig.width=5, fig.height=5, fig.cap="Cell abundance estimates with related cell types collapsed"----
matching = list()
matching$myeloid = c( "macrophages", "monocytes", "mDCs")
matching$T.NK = c("CD4.T.cells","CD8.T.cells", "Treg", "NK")
matching$B = c("B")
matching$mast = c("mast")
matching$neutrophils = c("neutrophils")
matching$stroma = c("endothelial.cells", "fibroblasts")


collapsed = collapseCellTypes(fit = restils, 
                              matching = matching)

heatmap(collapsed$beta, cexRow = 0.85, cexCol = 0.75)

## ----sessioninfo--------------------------------------------------------------
sessionInfo()