## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----warning=FALSE, message=FALSE, eval=FALSE--------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("CPSM") ## ----warning=FALSE, message=FALSE--------------------------------------------- library(CPSM) library(SummarizedExperiment) set.seed(7) # set seed #load data (from the package) data(Example_TCGA_LGG_FPKM_data, package = "CPSM") # view data str(Example_TCGA_LGG_FPKM_data[1:10]) ## ----warning=FALSE, message=FALSE--------------------------------------------- # Step1 - Specify the file path to your data #file_path <- "path/to/your/TCGA-LGG_FPKM_data_with_clin_data.txt" # Step2 - load/read data #data <- read.table(file = file_path, header = TRUE, sep = "\t",stringsAsFactors = FALSE, check.names = FALSE) # Step3 - View/Inspect the first few rows #head(data[1:30) ## ----------------------------------------------------------------------------- data(Example_TCGA_LGG_FPKM_data, package = "CPSM") combined_df <- cbind( as.data.frame(colData(Example_TCGA_LGG_FPKM_data)) [, -ncol(colData(Example_TCGA_LGG_FPKM_data))], t(as.data.frame(assay( Example_TCGA_LGG_FPKM_data, "expression" ))) ) # View top rows and first 30 columns of data print(str(Example_TCGA_LGG_FPKM_data[1:30]),2) #------------------------ OUTPUTS ---------------------# # Access the output of function New_data <- data_process_f(combined_df, col_num = 20, surv_time = "OS.time") # View/Inspect the first few rows of output data after data pre-processing str(New_data[1:10]) ## ----------------------------------------------------------------------------- #load data data(New_data, package = "CPSM") # View top rows and first 30 columns of data print(head(New_data[1:30]),3) # Call/Run the function for "New_data" result <- tr_test_f(data = New_data, fraction = 0.9) #------------------------ OUTPUTS ---------------------# # Access output from the function :train and test data train_FPKM <- result$train_data # View top rows and first 30 columns of data print(head(train_FPKM[1:30]),2) # Access output - test data test_FPKM <- result$test_data # View top rows and first 30 columns of data print(head(test_FPKM[1:30]),3) ## ----------------------------------------------------------------------------- # Step 3 - Data Normalization #load train and test data from package data(train_FPKM, package = "CPSM") # View top rows and first 50 columns of data print(head(train_FPKM[1:30]),3) data(test_FPKM, package = "CPSM") # View top rows and first 50 columns of data print(head(test_FPKM[1:30]),3) # Call function to Normalize the training and test data sets Result_N_data <- train_test_normalization_f( train_data = train_FPKM, test_data = test_FPKM, col_num = 21 ) #------------------------ OUTPUTS ---------------------# # Access the Normalized train and test data Train_Clin <- Result_N_data$Train_Clin Test_Clin <- Result_N_data$Test_Clin Train_Norm_data <- Result_N_data$Train_Norm_data Test_Norm_data <- Result_N_data$Test_Norm_data # view output - train clinincal data str(Train_Clin[1:10]) # view output - Train normalized data str(Train_Norm_data[1:10]) # view output - test clinincal data str(Test_Clin[1:10]) # view output - Test normalized data str(Test_Norm_data[1:10]) ## ----warning=FALSE, message=FALSE, fig.width=7, fig.height=4------------------ # Step 4 - Lasso PI Score #load data - Normalized train data data(Train_Norm_data, package = "CPSM") # View top rows and first 30 columns of data print(str(Train_Norm_data[1:30]),2) #load data - Normalized test data data(Test_Norm_data, package = "CPSM") # View top rows and first 30 columns of data print(str(Test_Norm_data[1:30]),2) # Call/Run function to select features and generate PI score using LASSO-Regression Result_PI <- Lasso_PI_scores_f( train_data = Train_Norm_data, test_data = Test_Norm_data, nfolds = 5, col_num = 21, surv_time = "OS_month", surv_event = "OS" ) #------------------------ OUTPUTS ---------------------# # Access output - selected features (with beta coefficient value)) by LASSO Train_Lasso_key_variables <- Result_PI$Train_Lasso_key_variables #view top features (from selected set) with beta coeff values print(head(Train_Lasso_key_variables)) # Access output - Train set with PI values Train_PI_data <- Result_PI$Train_PI_data # view Train set with PI values str(Train_PI_data[1:10]) # Access output - Test set with PI values Test_PI_data <- Result_PI$Test_PI_data # view Test set with PI values str(Test_PI_data[1:10]) # view plot from LASSO-Lasso regression lambda plot plot(Result_PI$cvfit) ## ----warning=FALSE, message=FALSE--------------------------------------------- # Step 4b Univariate Survival Significant Feature Selection. # load normalized train data data(Train_Norm_data, package = "CPSM") # View top rows and first 30 columns of data print(head(Train_Norm_data[1:30]),3) # load normalized test data data(Test_Norm_data, package = "CPSM") # View top rows and first 30 columns of data print(head(Test_Norm_data[1:30]),3) # Call/Run function to select features using Univariate COX method Result_Uni <- Univariate_sig_features_f( train_data = Train_Norm_data, test_data = Test_Norm_data, col_num = 21, surv_time = "OS_month", surv_event = "OS" ) #------------------------ OUTPUTS ---------------------# # Access output - A table of univariate significant genes Univariate_Suv_Sig_G_L <- Result_Uni$Univariate_Survival_Significant_genes_List # view top features (from selected set) using Univariate Cox regression print(head(Univariate_Suv_Sig_G_L)) # Access output - Train data with only sig features Train_Uni_sig_data <- Result_Uni$Train_Uni_sig_data # view Train data with only sig features str(Train_Uni_sig_data[1:10]) # Access output - Test data with only sig features Test_Uni_sig_data <- Result_Uni$Test_Uni_sig_data # view Test data with only sig features str(Test_Uni_sig_data[1:10]) # Access output - Uni_Sur_Sig_clin_List <- Result_Uni$Univariate_Survival_Significant_clin_List # view A table of univariate significant clinical feature print(head(Uni_Sur_Sig_clin_List)) # Access output - ZPH test results ZPH_test <- Result_Uni$ZPH_Genes # View ZPH (diagnostics test) results - #str(ZPH_test[1:10]) head(ZPH_test[1:10]) ## ----warning=FALSE, message=FALSE, error = TRUE------------------------------- try({ # load data for model building/development # Load Training data data(Train_Clin, package = "CPSM") # View top rows of data print(str(Train_Clin),2) # Load Test data data(Test_Clin, package = "CPSM") # View top rows of data print(str(Test_Clin),2) # Load a list of selected features data(Key_Clin_feature_list, package = "CPSM") print(Key_Clin_feature_list) # Call/Run function to develop MTLR model Result_Model_Type1 <- MTLR_pred_model_f( train_clin_data = Train_Clin, test_clin_data = Test_Clin, Model_type = 1, train_features_data = Train_Clin, test_features_data = Test_Clin, Clin_Feature_List = Key_Clin_feature_list, surv_time = "OS_month", surv_event = "OS", nfolds = 5 ) #------------------------ OUTPUTS ---------------------# # Access output - Predicted Survival probabilty across different time points survCurves_data <- Result_Model_Type1$survCurves_data # View Predicted Survival probabilty for test samples str(survCurves_data) # Access output - Predicted mean and median survival of test data mean_median_survival_tim_d <- Result_Model_Type1$mean_median_survival_time_data # View Predicted mean and median survival time str(mean_median_survival_tim_d) # Access output - survival_result_based_on_MTLR <- Result_Model_Type1$survival_result_based_on_MTLR # Access output - Final Evaluation parameters of results Error_mat_for_Model <- Result_Model_Type1$Error_mat_for_Model str(Error_mat_for_Model) # view Evaluation parameters of the model head(Error_mat_for_Model) }) ## ----warning=FALSE, message=FALSE, error = TRUE------------------------------- try({ # Load training data with clinical features data(Train_Clin, package = "CPSM") # View top rows of data print(str(Train_Clin)) # Load test data with clinical features data(Test_Clin, package = "CPSM") # View top rows of data print(str(Train_Clin)) # load training data with PI score data(Train_PI_data, package = "CPSM") # View top rows of data print(head(Train_PI_data),3) # Load test data with PI score data(Test_PI_data, package = "CPSM") # View top rows of data print(head(Test_PI_data,3)) # Load a list of feature (PI ) data(Key_PI_list, package = "CPSM") #view list of features to build model print(str(Key_PI_list)) # Call/Run function to develop MTLR prediction model based on PI score Result_Model_Type2 <- MTLR_pred_model_f( train_clin_data = Train_Clin, test_clin_data = Test_Clin, Model_type = 2, train_features_data = Train_PI_data, test_features_data = Test_PI_data, Clin_Feature_List = Key_PI_list, surv_time = "OS_month", surv_event = "OS", nfolds = 5 ) #------------------------ OUTPUTS ---------------------# # Access output - Predicted Survival probabilty across different time points survCurves_data <- Result_Model_Type2$survCurves_data # View Predicted Survival probabilty for test samples str(survCurves_data) # Access output - Predicted mean and median survival of test data mean_median_surviv_tim_da <- Result_Model_Type2$mean_median_survival_time_data # View Predicted mean and median survival of test data str(mean_median_surviv_tim_da) # Access output - all results for test data survival_result_b_on_MTLR <- Result_Model_Type2$survival_result_based_on_MTLR # Access output - Final Evaluation parameters of results Error_mat_for_Model <- Result_Model_Type2$Error_mat_for_Model # view Evaluation parameters of the model str(Error_mat_for_Model) # view Evaluation parameters of the model head(Error_mat_for_Model) }) ## ----warning=FALSE, message=FALSE, error = TRUE------------------------------- try({ # Load training data with clinical feature data(Train_Clin, package = "CPSM") # View top rows of data print(str(Train_Clin),2) # Load test data with clinical feature data(Test_Clin, package = "CPSM") # View top rows of data print(str(Test_Clin),2) # Load training data with PI score value data(Train_PI_data, package = "CPSM") # View top rows of data print(head(Train_PI_data),3) # Load test data with PI score value data(Test_PI_data, package = "CPSM") # View top rows of data print(head(Test_PI_data),3) # Load a list of feature containing Clinical feature along with PI data(Key_Clin_features_with_PI_list, package = "CPSM") # View top rows of feature list based on which model will build print(head(Key_Clin_features_with_PI_list)) # Call/Run function to develop MTLR prediction model based on PI score with Clinical features Result_Model_Type3 <- MTLR_pred_model_f( train_clin_data = Train_Clin, test_clin_data = Test_Clin, Model_type = 3, train_features_data = Train_PI_data, test_features_data = Test_PI_data, Clin_Feature_List = Key_Clin_features_with_PI_list, surv_time = "OS_month", surv_event = "OS", nfolds = 5 ) #------------------------ OUTPUTS ---------------------# # Access output - Predicted Survival probabilty across different time points survCurves_data <- Result_Model_Type3$survCurves_data # View Predicted Survival probabilty for test samples str(survCurves_data) # Access output - Predicted mean and median survival of test data mean_median_surv_tim_da <- Result_Model_Type3$mean_median_survival_time_data # View Predicted mean and median survival of test data str(mean_median_surv_tim_da) # Access output - all results for test data survival_result_b_on_MTLR <- Result_Model_Type3$survival_result_based_on_MTLR # Access output - Final Evaluation parameters of results Error_mat_for_Model <- Result_Model_Type3$Error_mat_for_Model # view Evaluation parameters of the model str(Error_mat_for_Model) # view Evaluation parameters of the model head(Error_mat_for_Model) }) ## ----warning=FALSE, message=FALSE, error = TRUE------------------------------- try({ # Load training data with clinical features data(Train_Clin, package = "CPSM") # View top rows of data print(head(Train_Clin,3)) # Load test data with clinical features data(Test_Clin, package = "CPSM") # View top rows of data print(head(Test_Clin,3)) # Load normalized training data data(Train_Uni_sig_data, package = "CPSM") # View top rows of data with first 10 columns print(head(Train_Uni_sig_data[1:10],3)) # Load normalized test data data(Test_Uni_sig_data, package = "CPSM") # View top rows of data (first 10 columns) print(head(Test_Uni_sig_data[1:10],3)) # Load a list of clinical feature along with top feature from univariate survival analysis data(Key_univariate_features_with_Clin_list, package = "CPSM") # View list of feature based on which model will built print(head(Key_univariate_features_with_Clin_list)) # Call/Run function to develop MTLR model based on Clinical and selected univariate features Result_Model_Type5 <- MTLR_pred_model_f( train_clin_data = Train_Clin, test_clin_data = Test_Clin, Model_type = 4, train_features_data = Train_Uni_sig_data, test_features_data = Test_Uni_sig_data, Clin_Feature_List = Key_univariate_features_with_Clin_list, surv_time = "OS_month", surv_event = "OS", nfolds = 5 ) #------------------------ OUTPUTS ---------------------# # Access output - Predicted Survival probabilty across different time points survCurves_data <- Result_Model_Type5$survCurves_data # View Predicted Survival probabilty for test samples str(survCurves_data) # Access output - Predicted mean and median survival of test data mean_median_surv_tim_da <- Result_Model_Type5$mean_median_survival_time_data # View Predicted mean and median survival of test data str(mean_median_surv_tim_da) # Access output - all results for test data survival_result_b_on_MTLR <- Result_Model_Type5$survival_result_based_on_MTLR # Access output - Final Evaluation parameters of results Error_mat_for_Model <- Result_Model_Type5$Error_mat_for_Model # view Evaluation parameters of the model str(Error_mat_for_Model) # view Evaluation parameters of the model head(Error_mat_for_Model) }) ## ----warning=FALSE, message=FALSE, error = TRUE , fig.width=7, fig.height=4---- try({ # Create Survival curves/plots for individual patients # Load predicted survival probability (at different time points ) data of test samples to plot survival curve plot data(survCurves_data, package = "CPSM") # View top rows of data print(head(survCurves_data,3)) # Call/Run functions to plot survival curve plot plots <- surv_curve_plots_f( Surv_curve_data = survCurves_data, selected_sample = "TCGA-TQ-A7RQ-01", font_size = 12, line_size = 0.5, all_line_col = "grey70", highlight_col = "red" ) #------------------------ OUTPUTS ---------------------# # Access output - print survival plot with all patients in test data print(plots$all_patients_plot) # Access output - print survival plot with highlighting the survival curve of selected patient print(plots$highlighted_patient_plot) }) ## ----warning=FALSE, message=FALSE, error = TRUE , fig.width=7, fig.height=4---- try({ # Load data of predicted mean/median survival time for test samples data(mean_median_survival_time_data, package = "CPSM") # View top rows of data print(head(mean_median_survival_time_data),3) custom_colors <- c( "TRUE.Mean" = "cyan", "TRUE.Median" = "pink", "FALSE.Mean" = "gray90", "FALSE.Median"= "gray70" ) # Call/Run functions to plot barplot plots_2 <- mean_median_surv_barplot_f( surv_mean_med_data = mean_median_survival_time_data, selected_sample = "TCGA-TQ-A7RQ-01", font_size = 8, font_color = "black", bar_colors = custom_colors ) #------------------------ OUTPUTS ---------------------# # Access output - Print barplots representing predicted mean/median survival time of patients print(plots_2$mean_med_all_pat) # Access output - Print barplots representing predicted mean/median survival time of patients with highlighting selected patient print(plots_2$highlighted_selected_pat) }) ## ----warning=FALSE, message=FALSE, error=TRUE--------------------------------- try({ # Load example data from CPSM package # Load training data with selected features (e.g. PI values) data(Train_PI_data, package = "CPSM") # View top rows of data print(head(Train_PI_data),3) # Load test data with selected features (e.g. PI values) data(Test_PI_data, package = "CPSM") # View top rows of data print(head(Train_PI_data),3) # Load feature list data(Key_PI_list, package = "CPSM") # View feature list print(str(Key_PI_list)) # Call/Run function to Predict survival-based risk groups for test samples Results_Risk_group_Prediction <- predict_survival_risk_group_f( selected_train_data = Train_PI_data, selected_test_data = Test_PI_data, Feature_List = Key_PI_list ) #------------------------ OUTPUTS ---------------------# # Access output - Performance of the best model on Training and Test data Best_model_Prediction_results<- Results_Risk_group_Prediction$misclassification_results # View Performance of the best model on Training and Test data print(head(Best_model_Prediction_results)) # View Prediction results of the best model on Test set Test_results <- Results_Risk_group_Prediction$Test_results #Prediction resulst on Test data print(head(Test_results)) }) ## ----fig.height=6, fig.width=8, warning=FALSE, message=FALSE------------------ # Load example data # Load predicted risk-group results for training samples data(Train_results, package = "CPSM") # View top rows of data print(head(Train_results),3) # Load predicted risk-group results for test samples data(Test_results, package = "CPSM") # View top rows of data print(head(Test_results),3) # Load predicted survival probabiliy data (at multiple time points) for test samples data(survCurves_data, package = "CPSM") # View top rows of data print(head(survCurves_data),3) # Select a test sample to visualize sample_id <- "TCGA-TQ-A7RQ-01" # Generate KM overlay plot KM_plot <- km_overlay_plot_f( Train_results = Train_results, Test_results = Test_results, survcurve_te_data = survCurves_data, selected_sample = sample_id, font_size = 12, #font size train_palette = c("firebrick", "blue"), # custom risk group colors test_curve_col = "darkgreen", # highlight test sample in darkgreen test_curve_size = 1.2, # thicker test sample curve test_curve_lty = "dotdash", # dashed-dot test curve annotation_col = "black" # annotation text in black ) #------------------------ OUTPUTS ---------------------# # View KM plot representing the comparison of survival curve of selected test sample vs survival curves of risk-groups of training data KM_plot ## ----warning=FALSE, message=FALSE, error = TRUE, fig.width=7, fig.height=6---- try({ # Load Normalozed Training data with along with Survival info selected features (based on which user want to develop nomogram) and survival info data(Train_Data_Nomogram_input, package = "CPSM") # View top rows of data print(head(Train_Data_Nomogram_input[1:30]),3) # Load a list of selected features (based on which user want to develop nomogram) data(feature_list_for_Nomogram, package = "CPSM") # View feature list to build nomogram print(str(feature_list_for_Nomogram)) # Call/run function to generate nomogram Result_Nomogram <- Nomogram_generate_f( data = Train_Data_Nomogram_input, Feature_List = feature_list_for_Nomogram, surv_time = "OS_month", surv_event = "OS", font_size = 0.8, axis_cex = 0.5, tcl_len = 0.5, label_margin = 0.5, col_grid = gray(c(0.85, 0.95)) ) #------------------------ OUTPUTS ---------------------# # Access output - C-index value and Nomogram plot C_index_mat <- Result_Nomogram$C_index_mat #display C-index print(C_index_mat) }) ## ----------------------------------------------------------------------------- sessionInfo()