## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----installation, eval=FALSE--------------------------------------------
#  # Install from Github
#  install.packages('devtools')
#  library('devtools')
#  install_github('langholee/XINA')
#  
#  # Install from Bioconductor
#  install.packages("BiocManager")
#  BiocManager::install("XINA")

## ----import libraries----------------------------------------------------
library(XINA)
library(igraph)
library(ggplot2)
library(STRINGdb)

## ----example random dataset----------------------------------------------
# Generate random multiplexed time-series data
random_data_info <- make_random_xina_data()

# The number of proteins
random_data_info$size

# Time points
random_data_info$time_points

# Three conditions
random_data_info$conditions

## ----check randomly generated data files---------------------------------
Control <- read.csv("Control.csv", check.names=FALSE, stringsAsFactors = FALSE)
Stimulus1 <- read.csv("Stimulus1.csv", check.names=FALSE, stringsAsFactors = FALSE)
Stimulus2 <- read.csv("Stimulus2.csv", check.names=FALSE, stringsAsFactors = FALSE)

head(Control)
head(Stimulus1)
head(Stimulus2)

## ----data matrix---------------------------------------------------------
head(Control[random_data_info$time_points])

## ----fix random seed-----------------------------------------------------
set.seed(0)

## ----set up for the clustering-------------------------------------------
# Data files
data_files <- paste(random_data_info$conditions, ".csv", sep='')
data_files

# time points of the data matrix
data_column <- random_data_info$time_points
data_column

## ----XINA clustering-----------------------------------------------------
# Run the model-based clusteirng
clustering_result <- xina_clustering(data_files, data_column=data_column, nClusters=30)

## ----XINA clustering with VVI covariance matrix, eval=FALSE--------------
#  # Model-based clustering using VVI covariance model
#  clustering_result_vvi <- xina_clustering(data_files, data_column=data_column, nClusters=30, chosen_model='VVI')

## ----XINA with k-means clustering----------------------------------------
clustering_result_km <- xina_clustering(data_files, data_column=data_column, nClusters=30, chosen_model='kmeans')

## ----load previous data--------------------------------------------------
clustering_result_reloaded <- load_previous_results(".")
head(clustering_result_reloaded$aligned)

## ----load xina_example---------------------------------------------------
data(xina_example)

## ----XINA clustering plot------------------------------------------------
plot_clusters(example_clusters, xylab=FALSE)

## ----clustering plot2, eval=FALSE----------------------------------------
#  plot_clusters(example_clusters, xval=c(0,2,6,12,24,48,72), xylab=FALSE)
#  
#  # You can change Y axis limits to be ranged from 0 to 0.35
#  plot_clusters(example_clusters, y_lim=c(0,0.35), xval=c(0,2,6,12,24,48,72), xylab=FALSE)
#  
#  # If you need, you can modify the clustering plot by creating your own ggplot theme.
#  theme1 <- theme(title=element_text(size=8, face='bold'),
#                  axis.text.x = element_text(size=7),
#                  axis.text.y = element_blank(),
#                  axis.ticks.x = element_blank(),
#                  axis.ticks.y = element_blank(),
#                  axis.title.x = element_blank(),
#                  axis.title.y = element_blank())
#  plot_clusters(example_clusters, ggplot_theme=theme1)

## ----XINA condition composition------------------------------------------
condition_composition <- plot_condition_compositions(example_clusters)
tail(condition_composition)

## ----XINA condition composition2, eval=FALSE-----------------------------
#  theme2 <- theme(legend.position="none", title=element_text(size=7, face='bold'))
#  condition_composition <- plot_condition_compositions(example_clusters, ggplot_theme=theme2)
#  
#  # Or you can adjust the legend size.
#  theme3 <- theme(legend.key.size = unit(0.3, "cm"),
#                  legend.text=element_text(size=5),
#                  title=element_text(size=7, face='bold'))
#  condition_composition <- plot_condition_compositions(example_clusters, ggplot_theme=theme3)
#  
#  # You can utilize your own colors for condition composition charts.
#  # Make a new color code for conditions
#  condition_colors <- c("tomato","steelblue1","gold")
#  names(condition_colors) <- example_clusters$condition
#  example_clusters$color_for_condition <- condition_colors
#  
#  # Draw condition composition pie-chart with the new color code
#  condition_composition <- plot_condition_compositions(example_clusters, ggplot_theme=theme3)
#  
#  # XINA also can draw bull's-eye plots instead of pie-charts.
#  condition_composition <- plot_condition_compositions(example_clusters, ggplot_theme=theme3, bullseye = TRUE)

## ----clustering plot with condition colors-------------------------------
# New colors for the clustering plot based on condition composition
example_clusters$color_for_clusters <- mutate_colors(condition_composition, example_clusters$color_for_condition)
example_clusters$color_for_clusters

## ----drawing clustering plot with the mutated colors, eval=FALSE---------
#  plot_clusters(example_clusters, xval=c(0,2,6,12,24,48,72), xylab=FALSE)

## ----clustering plot with lowered threshold, eval=FALSE------------------
#  example_clusters$color_for_clusters <- mutate_colors(condition_composition, example_clusters$color_for_condition, threshold_percent=40)
#  plot_clusters(example_clusters, xval=c(0,2,6,12,24,48,72), xylab=FALSE)

## ----XINA comigration search---------------------------------------------
classes <- as.vector(example_clusters$condition)
classes

all_cor <- alluvial_enriched(example_clusters, classes)
head(all_cor)

## ----XINA comigration search with different order, eval=FALSE------------
#  all_cor_Stimulus1_start <- alluvial_enriched(example_clusters, c(classes[2],classes[1],classes[3]))
#  head(all_cor_Stimulus1_start)

## ----XINA comigration search with comigration size filter, eval=FALSE----
#  cor_bigger_than_10 <- alluvial_enriched(example_clusters, classes, comigration_size=10)
#  head(cor_bigger_than_10)

## ----finding the condition-biased clusters, eval=FALSE-------------------
#  condition_biased_comigrations <- get_condition_biased_comigrations(clustering_result=example_clusters,
#                                                                     count_table=cor_bigger_than_10,
#                                                                     selected_conditions=classes,
#                                                                     condition_composition=condition_composition,
#                                                                     threshold_percent=50, color_for_null='gray',
#                                                                     color_for_highly_matched='yellow', cex = 1)

## ----protein list of specific comigrations-------------------------------
condition_biased_clusters <- condition_composition[condition_composition$Percent_ratio>=50,]
control_biased_clusters <- condition_biased_clusters[condition_biased_clusters$Condition=="Control",]$Cluster
Stimulus1_biased_clusters <- condition_biased_clusters[condition_biased_clusters$Condition=="Stimulus1",]$Cluster
Stimulus2_biased_clusters <- condition_biased_clusters[condition_biased_clusters$Condition=="Stimulus2",]$Cluster

# Get the proteins showing condition-specific expression patterns in three conditions
proteins_found_in_condition_biased_clusters <- subset(example_clusters$aligned, Control==control_biased_clusters[1] & Stimulus1==Stimulus1_biased_clusters[1] & Stimulus2==Stimulus2_biased_clusters[1])
nrow(proteins_found_in_condition_biased_clusters)
head(proteins_found_in_condition_biased_clusters)

## ----comigrations with comigration size and significance filters---------
Control_Stimulus1_significant <- alluvial_enriched(example_clusters, c("Control","Stimulus1"), comigration_size=5, pval_threshold=0.05)
head(Control_Stimulus1_significant)

## ----STRING DB set up----------------------------------------------------
string_db <- STRINGdb$new( version="10", species=9606, score_threshold=0, input_directory="" )
string_db

## ----select PPI confidence level of STRING DB, eval=FALSE----------------
#  get.edge.attribute(sub_graph)$combined_score
#  
#  # Medium confidence PPIs only
#  string_db_med_confidence <- subgraph.edges(string_db$graph, which(E(string_db$graph)$combined_score>=400), delete.vertices = TRUE)
#  
#  # High confidence PPIs only
#  string_db_high_confidence <- subgraph.edges(string_db$graph, which(E(string_db$graph)$combined_score>=700), delete.vertices = TRUE)

## ----XINA analysis with STRING DB----------------------------------------
xina_result <- xina_analysis(example_clusters, string_db)

## ----HPRD PPI network----------------------------------------------------
# Construct HPRD PPI network
data(HPRD)
ppi_db_hprd <- simplify(graph_from_data_frame(d=hprd_ppi, directed=FALSE), remove.multiple = FALSE, remove.loops = TRUE)
head(hprd_ppi)

## ----XINA analysis with HPRD---------------------------------------------
# Run XINA with HPRD protein-protein interaction database
xina_result_hprd <- xina_analysis(example_clusters, ppi_db_hprd, is_stringdb=FALSE)

## ----plotting PPI networks of all the clusters, eval=FALSE---------------
#  # XINA network plots labeled gene names
#  xina_plots(xina_result, example_clusters)

## ----xina_plots without labels, eval=FALSE-------------------------------
#  # XINA network plots without labels
#  xina_plots(xina_result, example_clusters, vertex_label_flag=FALSE)

## ----xina_plots with tree layout, eval=FALSE-----------------------------
#  # Plot PPI networks with tree layout
#  xina_plots(xina_result, example_clusters, vertex_label_flag=FALSE, layout_specified='tree')

## ----xina_plot_bycluster-------------------------------------------------
xina_plot_bycluster(xina_result, example_clusters, cl=1)

## ----xina_plot_bycluster2, eval=FALSE------------------------------------
#  xina_plot_bycluster(xina_result, example_clusters, cl=1, condition="Control")

## ----xina_plot_all-------------------------------------------------------
img_size <- c(3000,3000)
xina_plot_all(xina_result, example_clusters, img_size=img_size)

## ----xina_plot_all2, eval=FALSE------------------------------------------
#  xina_plot_all(xina_result, example_clusters, condition="Control", img_size=img_size)
#  xina_plot_all(xina_result, example_clusters, condition="Stimulus1", img_size=img_size)
#  xina_plot_all(xina_result, example_clusters, condition="Stimulus2", img_size=img_size)

## ----xina_plot_all_eigen, eval=FALSE-------------------------------------
#  xina_plot_all(xina_result, example_clusters, centrality_type="Eigenvector",
#                edge.color = 'gray', img_size=img_size)
#  xina_plot_all(xina_result, example_clusters, condition="Control", centrality_type="Eigenvector",
#                edge.color = 'gray', img_size=img_size)
#  xina_plot_all(xina_result, example_clusters, condition="Stimulus1", centrality_type="Eigenvector",
#                edge.color = 'gray', img_size=img_size)
#  xina_plot_all(xina_result, example_clusters, condition="Stimulus2", centrality_type="Eigenvector",
#                edge.color = 'gray', img_size=img_size)

## ----enrichment tests, eval=FALSE----------------------------------------
#  enriched_fdr_0.05 <- xina_enrichment(xina_result, example_clusters, string_db, pval_threshold=0.05)

## ----network analysis of comigrated proteins-----------------------------
protein_list <- proteins_found_in_condition_biased_clusters$`Gene name`
protein_list

plot_title_ppi <- "PPI network of the proteins found in the condition-biased clusters"

# Draw PPI networks and compute eigenvector centrality score.
comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
                                                           vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
                                                           main=plot_title_ppi)

## ----three breaks, results="hide", eval=FALSE----------------------------
#  # Draw with less breaks
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             num_breaks=3, main=plot_title_ppi)
#  

## ----test of xina network plotting, eval=FALSE---------------------------
#  # Draw without labels
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.size=10, vertex_label_flag=FALSE)

## ----layout test, eval=FALSE---------------------------------------------
#  # Sphere layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "sphere")
#  # Star layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "star")
#  
#  # Gem layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "gem")
#  
#  # Tree layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "tree")
#  
#  # Circle layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "circle")
#  
#  # Random layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "random")
#  
#  # Nicely layout
#  comigrations_condition_biased_clusters <- xina_plot_single(xina_result, protein_list, centrality_type="Eigenvector",
#                                                             vertex.label.cex=0.5, vertex.size=8, vertex.label.dist=1,
#                                                             main=plot_title_ppi, layout_specified = "nicely")
#  

## ----enrichment test of the comigrated proteins--------------------------
# Enrichment test using KEGG pathway terms
kegg_enriched <- xina_enrichment(string_db, protein_list, enrichment_type = "KEGG", pval_threshold=0.1)
head(kegg_enriched$KEGG)
# plot enrichment test results
plot_enrichment_results(kegg_enriched$KEGG, num_terms=20)

## ----enrichment test of the comigrated proteins2, eval=FALSE-------------
#  # Enrichment test using GO pathway terms
#  go_enriched <- xina_enrichment(string_db, protein_list, enrichment_type = "GO", pval_threshold=0.1)
#  head(go_enriched$Process)
#  head(go_enriched$Function)
#  head(go_enriched$Component)

## ----draw GO enrichment results, eval=FALSE------------------------------
#  plot_enrichment_results(go_enriched$Process, num_terms=20)
#  plot_enrichment_results(go_enriched$Function, num_terms=20)
#  plot_enrichment_results(go_enriched$Component, num_terms=20)