with all surrogates
maintaing the logFC between treated and control
Let’s say you’re not convinced yet by the package, so you want to be
conservative and remove correlations to all SUCs but not the logFC
between conditions. In this case,
center_SUCs = "per_sample"
.
Now we can compare the new Pearson correlation coefficients
calculated from the assay counts_simple_persample
(after
RUCova, upper triangle) to the original coefficients from the assay
``counts``` (lower triangle).
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_persample")

Log fold-changes between irradiated and control condition are kept
(positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_persample")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")

changing logFC between samples accordingly
As radiation changes the cell volume, we think differences in protein
intensities between treated and control are confounded. Hence, we want
to remove any difference that correlates with the SUCs
(center_SUCs = "across_sample"
).
#> [1] "Fitting pH3"
#> [1] "Fitting IdU"
#> [1] "Fitting Cyclin_D1"
#> [1] "Fitting Cyclin_B1"
#> [1] "Fitting Ki.67"
#> [1] "Fitting pRb"
#> [1] "Fitting pH2A.X"
#> [1] "Fitting p.p53"
#> [1] "Fitting p.p38"
#> [1] "Fitting pChk2"
#> [1] "Fitting pCDC25c"
#> [1] "Fitting cCasp3"
#> [1] "Fitting cPARP"
#> [1] "Fitting pAkt"
#> [1] "Fitting pAkt_T308"
#> [1] "Fitting pMEK1.2"
#> [1] "Fitting pERK1.2"
#> [1] "Fitting pS6"
#> [1] "Fitting p4e.BP1"
#> [1] "Fitting pSmad1.8"
#> [1] "Fitting pSmad2.3"
#> [1] "Fitting pNFkB"
#> [1] "Fitting IkBa"
#> [1] "Fitting CXCL1"
#> [1] "Fitting Lamin_B1"
#> [1] "Fitting pStat1"
#> [1] "Fitting pStat3"
#> [1] "Fitting YAP"
#> [1] "Fitting NICD"
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_acrosssamples")

Log fold-changes between irradiated and control condition are
modified accordingly (positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_acrosssamples")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")

with PC1 only
Let’s imagine you want to be conservative and only remove
correlations between markers and PC1 (of SUCs).
pca_cal33 <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
select(x) |>
mutate_all(asinh) |>
mutate_all(scale) |>
prcomp()
Calculate and plot the variance explained by each PC:
tibble(perc = as.numeric(pca_cal33$sdev^2/sum(pca_cal33$sdev^2))*100,
PC = 1:length(pca_cal33$sdev)) |>
ggplot(aes(x = PC, y = perc, label = round(perc,1))) +
geom_col() +
geom_label()

Check the loadings of each PC:
as.data.frame(pca_cal33$rotation) |>
rownames_to_column("x") |>
pivot_longer(names_to = "PC", values_to = "loadings", -x) |>
ggplot(aes(x = loadings, y = x)) +
geom_col() +
facet_wrap(~PC, nrow = 1)
In this example, PC1 has positive loadings. Meaning PC1 will positively
correlate with the markers, which is intuitive if we think of it as the
cell size. In case for your data set, PC1 has negative loadings, you can
just the direction for a more intuitive analysis:
pca_cal33$x |> as.data.frame() |> mutate(PC1 = -PC1) #variable not saved as not necessary here
Add the PCA to the sce object under the name “PCA”:
name_reduced_dim = "PCA"
reducedDim(sce_Cal33, name_reduced_dim) <- pca_cal33$x
Then, SUCs= "PC1"
and
apply_asinh_SUCs = FALSE
, as asinh transformation is not
necessary on PCs (it was applied on SUCs before PCA). This applies to
all models.
#> [1] "Fitting pH3"
#> [1] "Fitting IdU"
#> [1] "Fitting Cyclin_D1"
#> [1] "Fitting Cyclin_B1"
#> [1] "Fitting Ki.67"
#> [1] "Fitting pRb"
#> [1] "Fitting pH2A.X"
#> [1] "Fitting p.p53"
#> [1] "Fitting p.p38"
#> [1] "Fitting pChk2"
#> [1] "Fitting pCDC25c"
#> [1] "Fitting cCasp3"
#> [1] "Fitting cPARP"
#> [1] "Fitting pAkt"
#> [1] "Fitting pAkt_T308"
#> [1] "Fitting pMEK1.2"
#> [1] "Fitting pERK1.2"
#> [1] "Fitting pS6"
#> [1] "Fitting p4e.BP1"
#> [1] "Fitting pSmad1.8"
#> [1] "Fitting pSmad2.3"
#> [1] "Fitting pNFkB"
#> [1] "Fitting IkBa"
#> [1] "Fitting CXCL1"
#> [1] "Fitting Lamin_B1"
#> [1] "Fitting pStat1"
#> [1] "Fitting pStat3"
#> [1] "Fitting YAP"
#> [1] "Fitting NICD"
If we regress-out any PCs and want to check the correlation
coefficient, it is important we specify now the name for the heatmap
function to include it: ``name_reduced_dim = “PCA”```.
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_PC1", name_reduced_dim = "PCA")

Log fold-changes between irradiated and control condition are
modified accordingly (positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_PC1")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")
