5 Figure 4

Figure 4: Metabolic information yields an independent prognostic clustering classification for BCP-ALL and T-ALL.

A. The schematic workflow following SNF integration analysis. The BMMC omics data were inputted to generate a fused similarity network, which was subsequently utilized for sample clustering in this study.

B. The discrimination of three BCP-ALL clusters (BC1, BC2 and BC3) generated from SNF method.

C. The association of the three metabolic clusters with clinical OS in 75 BCP-ALL patients.

D. The multivariate Cox analysis reveals that the metabolic cluster bore an independent prognostic value for OS and EFS, respectively. The WBC was stratified by 30×109/L. In the genetic-based risk stratification, BCR::ABL1, ETV6::RUNX1, DUX4::IGH and TCF3::PBX1 were categorized into standard risk subgroup, otherwise were into poor risk subgroup.

E. A core metabolism-based subnetwork that best explains the difference between BC3 and BC1/2. In this network, each circular node represents a metabolite, labeled with its KEGG ID, and each square node represents a gene. Node colors indicate logFC values: red and green signify different gene expression levels, while yellow and blue signify varying metabolite levels. The highlighted labels correspond to pathways in the cholesterol metabolism and nicotinate and nicotinamide metabolism.

F. The discrimination of two T-ALL clusters (TC1 and TC2) based on SNF method.

G. The association of the two metabolic clusters with clinical OS outcomes in 24 T-ALL patients. The result suggested TC1 was of significantly poorer prognosis than TC2.

H. A core metabolism-based subnetwork that best explains the difference between TC2 and TC1. The highlighted labels correspond to pathways in the TCA and OXPHOS. PC refers to a group of phosphatidylcholines without specific KEGG IDs.

5.1 (A) Schematic Workflow

The schematic workflow following integrated analysis. The BMMC omics data are inputted to generate a fused similarity network, which is subsequently utilized for sample clustering in this study.

library(dplyr)
library(MNet)
library(survival)
library(survminer)

#-------------------------------------------------------------------------------
#  Step 1: Cox regression of metabolomics of BCP-ALL
#-------------------------------------------------------------------------------
sample_info <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(!is.na(METc_BM_D0_ID)) %>%
  dplyr::filter(Lineage=="B") %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::select(bianhao,METc_BM_D0_ID)

dat_cell <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("label") %>%
  dplyr::select(all_of(sample_info$METc_BM_D0_ID)) %>%
  log1p()
names(dat_cell) <- sample_info$bianhao

clinical <- readxl::read_excel("raw_data/08 队列病人生存资料-v231026.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(bianhao,OS_stat,OS) %>%
  dplyr::inner_join(sample_info,by=c("bianhao")) %>%
  dplyr::arrange(match(bianhao,sample_info$bianhao)) %>%
  dplyr::select(-METc_BM_D0_ID) %>%
  unique()

dat_t <- t(dat_cell) %>%
  data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
metabolite_name <- names(dat_t)[3:ncol(dat_t)]

dat_t_raw <- t(dat_cell) %>%
  as.data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
metabolite_name_raw <- names(dat_t_raw)[3:ncol(dat_t_raw)]
dd <- data.frame(name=metabolite_name,name_raw=metabolite_name_raw)

univ_formulas <- lapply(metabolite_name,function(x) stats::as.formula(paste('survival::Surv(time, status)~', x)))

univ_models <- lapply( univ_formulas, function(x){survival::coxph(x, data = dat_t)})

univ_results <- lapply(univ_models,
                       function(x){
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (",
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         name <- rownames(x$coefficients)
                         res<-c(name,beta, HR, wald.test, p.value)
                         names(res)<-c("name","beta", "HR (95% CI for HR)", "wald.test","p.value")
                         return(res)
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
result <- as.data.frame(res)

metabolite_all <- result %>%
  dplyr::left_join(dd,by=c("name"))

write.table(metabolite_all,"result/Figure4/4A_1.cox_metabolite_all_B.txt",quote=F,row.names=F,sep="\t")

#-------------------------------------------------------------------------------
#  Step 2: Cox regression of  transcriptomics of BCP-ALL
#-------------------------------------------------------------------------------
gene_metabolite1 <- gene_metabolite %>% dplyr::filter(dest_type=="gene")
gene2 <- PathwayExtendData %>% filter(type=="gene")
gene_filter <- unique(c(gene_metabolite1$gene,gene2$name))
length(gene_filter)

sample_rna <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(Lineage=="B") %>% 
  dplyr::filter(!is.na(RNA_ID)) %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::select(bianhao,RNA_id_raw)

dat_gene <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(sample_rna$RNA_id_raw) %>%
  tibble::rownames_to_column(var="label") %>%
  dplyr::filter(label %in% gene_filter) %>%
  tibble::column_to_rownames("label")

names(dat_gene) <- sample_rna$bianhao

dat_gene_0 <- rowSums(dat_gene==0)
dat_gene <- dat_gene %>%
  dplyr::mutate(num=dat_gene_0) %>%
  dplyr::filter(num < 0.5*ncol(dat_gene)) %>%
  dplyr::select(-num)

clinical <- readxl::read_excel("raw_data/08 队列病人生存资料-v231026.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(bianhao_new,OS_stat,OS) %>%
  dplyr::mutate(bianhao=paste0("A",bianhao_new)) %>%
  dplyr::select(-bianhao_new) %>%
  unique()

dat_t <- t(dat_gene) %>%
  data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
gene_name <- names(dat_t)[3:ncol(dat_t)]

dat_t_raw <- t(dat_gene) %>%
  as.data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
gene_name_raw <- names(dat_t_raw)[3:ncol(dat_t_raw)]
dd <- data.frame(name=gene_name,name_raw=gene_name_raw)

univ_formulas <- lapply(gene_name,function(x) stats::as.formula(paste('survival::Surv(time, status)~', x)))

univ_models <- lapply(univ_formulas, function(x){survival::coxph(x, data = dat_t)})

univ_results <- lapply(univ_models,
                       function(x){
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (",
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         name <- rownames(x$coefficients)
                         res<-c(name,beta, HR, wald.test, p.value)
                         names(res)<-c("name","beta", "HR (95% CI for HR)", "wald.test","p.value")
                         return(res)
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
result <- as.data.frame(res)

gene_all <- result %>%
  dplyr::left_join(dd,by="name")

write.table(gene_all,"result/Figure4/4A_1.cox_gene_all_B.txt",quote=F,row.names=F,sep="\t")

#------------------------------------------------------------------------------
# Step 3: Cox regression of metabolomics of T-ALL
#------------------------------------------------------------------------------
sample_info <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(!is.na(METc_BM_D0_ID)) %>%
  dplyr::filter(Lineage=="T") %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::select(bianhao,METc_BM_D0_ID)

dat_cell <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("label") %>%
  dplyr::select(all_of(sample_info$METc_BM_D0_ID)) %>%
  log1p()
names(dat_cell) <- sample_info$bianhao

clinical <- readxl::read_excel("raw_data/08 队列病人生存资料-v231026.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(bianhao,OS_stat,OS) %>%
  dplyr::inner_join(sample_info,by=c("bianhao")) %>%
  dplyr::arrange(match(bianhao,sample_info$bianhao)) %>%
  dplyr::select(-METc_BM_D0_ID) %>%
  unique()

dat_t <- t(dat_cell) %>%
  data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
metabolite_name <- names(dat_t)[3:ncol(dat_t)]

dat_t_raw <- t(dat_cell) %>%
  as.data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
metabolite_name_raw <- names(dat_t_raw)[3:ncol(dat_t_raw)]
dd <- data.frame(name=metabolite_name,name_raw=metabolite_name_raw)

univ_formulas <- lapply(metabolite_name,function(x) stats::as.formula(paste('survival::Surv(time, status)~', x)))

univ_models <- lapply( univ_formulas, function(x){survival::coxph(x, data = dat_t)})

univ_results <- lapply(univ_models,
                       function(x){
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);
                         HR <-signif(x$coef[2], digits=2);
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (",
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         name <- rownames(x$coefficients)
                         res<-c(name,beta, HR, wald.test, p.value)
                         names(res)<-c("name","beta", "HR (95% CI for HR)", "wald.test","p.value")
                         return(res)
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
result <- as.data.frame(res)

metabolite_all <- result %>%
  dplyr::left_join(dd,by=c("name"))

write.table(metabolite_all,"result/Figure4/4A_1.cox_metabolite_all_T.txt",quote=F,row.names=F,sep="\t")

#-------------------------------------------------------------------------------
#  Step 4: Cox regression of  transcriptomics of T-ALL
#-------------------------------------------------------------------------------
gene_metabolite1 <- gene_metabolite %>% dplyr::filter(dest_type=="gene")
gene2 <- PathwayExtendData %>% filter(type=="gene")
gene_filter <- unique(c(gene_metabolite1$gene,gene2$name))
length(gene_filter)

sample_rna <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(Lineage=="T") %>% 
  dplyr::filter(!is.na(RNA_ID)) %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::select(bianhao,RNA_id_raw)

dat_gene <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(sample_rna$RNA_id_raw) %>%
  tibble::rownames_to_column(var="label") %>%
  dplyr::filter(label %in% gene_filter) %>%
  tibble::column_to_rownames("label")

names(dat_gene) <- sample_rna$bianhao

dat_gene_0 <- rowSums(dat_gene==0)
dat_gene <- dat_gene %>%
  dplyr::mutate(num=dat_gene_0) %>%
  dplyr::filter(num < 0.5*ncol(dat_gene)) %>%
  dplyr::select(-num)

clinical <- readxl::read_excel("raw_data/08 队列病人生存资料-v231026.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(bianhao_new,OS_stat,OS) %>%
  dplyr::mutate(bianhao=paste0("A",bianhao_new)) %>%
  dplyr::select(-bianhao_new) %>%
  unique()

dat_t <- t(dat_gene) %>%
  data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
gene_name <- names(dat_t)[3:ncol(dat_t)]

dat_t_raw <- t(dat_gene) %>%
  as.data.frame() %>%
  tibble::rownames_to_column(var="bianhao") %>%
  dplyr::left_join(clinical,by="bianhao") %>%
  dplyr::select(OS,OS_stat,everything()) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::rename("time"="OS") %>%
  dplyr::rename("status"="OS_stat")
gene_name_raw <- names(dat_t_raw)[3:ncol(dat_t_raw)]
dd <- data.frame(name=gene_name,name_raw=gene_name_raw)

univ_formulas <- lapply(gene_name,function(x) stats::as.formula(paste('survival::Surv(time, status)~', x)))

univ_models <- lapply( univ_formulas, function(x){survival::coxph(x, data = dat_t)})

univ_results <- lapply(univ_models,
                       function(x){
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (",
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         name <- rownames(x$coefficients)
                         res<-c(name,beta, HR, wald.test, p.value)
                         names(res)<-c("name","beta", "HR (95% CI for HR)", "wald.test",
                                       "p.value")
                         return(res)
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
result <- as.data.frame(res)

gene_all <- result %>%
  dplyr::left_join(dd,by="name")

write.table(gene_all,"result/Figure4/4A_1.cox_gene_all_T.txt",quote=F,row.names=F,sep="\t")

5.2 (B) Discrimination of BCP-ALL Clusters

The discrimination of three clusters (BC1, BC2 and BC3) generated from SNF method.

library(dplyr)
library(MNet)
library(survival)
library(survminer)

#-------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#-------------------------------------------------------------------------------
dat_filter <- data.table::fread("result/Figure2/1.BMMCvsHC.txt") %>%
  as.data.frame() %>%
  dplyr::arrange(P.Value) %>%
  dplyr::filter(P.Value < 0.5)
dat_filter$P.Value[1:5]

dat_cox <- data.table::fread("result/Figure4/4A_1.cox_metabolite_all_B.txt") %>%
 as.data.frame() %>%
  dplyr::arrange(p.value) %>%
  head(n=120)
dat_cox$p.value[1:5]
dat_filter <- dat_filter %>%
  dplyr::filter(name %in% dat_cox$name_raw)

sample_info <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(!is.na(METc_BM_D0_ID)) %>%
  dplyr::filter(!is.na(RNA_ID)) %>%
  dplyr::filter(Lineage=="B")

dat_cell <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  as.data.frame() %>%
  dplyr::filter(label %in% dat_filter$name) %>%
  tibble::column_to_rownames("label") %>%
  dplyr::select(all_of(sample_info$METc_BM_D0_ID)) %>%
  log1p()

names(dat_cell) <- sample_info$Pid

cox_gene <- data.table::fread("result/Figure4/4A_1.cox_gene_all_B.txt") %>%
  as.data.frame() %>%
  dplyr::arrange(p.value) %>%
  head(n=600)

dat_gene <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(sample_info$RNA_id_raw) %>%
  tibble::rownames_to_column(var="label") %>%
  dplyr::filter(label %in% cox_gene$name_raw) %>%
  tibble::column_to_rownames("label")

names(dat_gene) <- sample_info$Pid

#-------------------------------------------------------------------------------
#  Step 2: SNF of discrimination BCP-ALL clusters
#-------------------------------------------------------------------------------
library(SNFtool)
metabolite_norm = standardNormalization(t(dat_cell))
metgene_norm = standardNormalization(t(dat_gene))

dim(metabolite_norm)
dim(metgene_norm)

write.table(metabolite_norm,"result/Figure4/4B_metabolite_norm_B.txt",quote=F,sep="\t")
write.table(metgene_norm,"result/Figure4/4B_metgene_norm_B.txt",quote=F,sep="\t")

Dist1 = (SNFtool::dist2(as.matrix(metabolite_norm),as.matrix(metabolite_norm)))^(1/2)
Dist2 = (SNFtool::dist2(as.matrix(metgene_norm),as.matrix(metgene_norm)))^(1/2)
W1 = affinityMatrix(Dist1, 10, 0.5)
W2 = affinityMatrix(Dist2, 10, 0.5)
W = SNF(list(W1,W2), 10, 10)

Clusters = spectralClustering(W,K=3)
M_label=cbind(Clusters,sample_info) %>%
  dplyr::mutate(Clusters=ifelse(Clusters==1,"BC3",
                         ifelse(Clusters==2,"BC2",
                         ifelse(Clusters==3,"BC1",Clusters))))

write.table(M_label,paste0("result/Figure4/4B_cluster_B.txt"),quote=F,row.names = F,sep="\t")

library(ComplexHeatmap)
l <- M_label %>%
  dplyr::arrange(Clusters)

W_2 <- W %>%
  as.data.frame() %>%
  dplyr::select(l$Pid) %>%
  tibble::rownames_to_column(var="sample") %>%
  dplyr::arrange(match(sample,l$Pid)) %>%
  tibble::column_to_rownames("sample")
W_2[W_2==0.5] <- 0

col_fun = circlize::colorRamp2(c( 0, 0.03), c( "white","#F48326"))

p_metabolite <- Heatmap(W_2,height=unit(10,"cm"),name="Sample Data",
                        cluster_rows = F,cluster_columns = F,
                        col = col_fun,
                        show_column_names = F,show_row_names = F,
                        row_names_gp = gpar(fontsize = 3),
                        column_names_gp = gpar(fontsize = 3))

pdf("result/Figure4/4B_SNF_discrimination_complexheatmap_B.pdf",width=5,height = 5)
p_metabolite
dev.off()

5.3 (C) OS in 75 BCP-ALL patients

The association of the three metabolic clusters with clinical OS in 75 BCP-ALL patients.

library(dplyr)
library(MNet)
library(survival)
library(survminer)

#-------------------------------------------------------------------------------
#  Step 1: Load data
#-------------------------------------------------------------------------------

##### new survival #####
survival_new <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::select(PID,OS_stat,`OS (days)`) %>%
  dplyr::rename("OS"="OS (days)")

clinical_survival <- data.table::fread(paste0("result/Figure4/4B_cluster_B.txt")) %>%
  as.data.frame() %>%
  dplyr::select(-OS_stat,-OS,-EFS_stat,-EFS,-RFS_stat,-RFS) %>%
  left_join(survival_new,by=c("Pid"="PID"))

#-------------------------------------------------------------------------------
#  Step 2: Overall survival analysis
#-------------------------------------------------------------------------------
surv_object <- Surv(time = clinical_survival$OS/30, event = clinical_survival$OS_stat)
fit_cluster <- survfit(surv_object ~ Clusters, data = clinical_survival)

p1 <- survminer::ggsurvplot(fit_cluster, data = clinical_survival, 
                            title="OS",
                            palette=c("#9BD53F","#A6CEE3","#EA644C"),
                            tables.theme = theme_void(),risk.table = TRUE,
                            risk.table.fontsize=6,
                            break.time.by = 12,
                            risk.table.y.text = FALSE, 
                            tables.height = 0.15,risk.table.col = "black",
                            #pval = TRUE,pval.method = F,
                            #pval.coord=c(0.5,0.1),pval.size=6,
                            font.tickslab=c(14,"plain","black"),
                            xlab="Time (Months) ",ylab="OS Survival")

pdf(paste0("result/Figure4/4C_OS_B.pdf"),onefile=FALSE)
p1
dev.off()

5.4 (D) Multivariate Cox Analysis

The multivariate Cox analysis reveals that the metabolic cluster bore an independent prognostic value for OS and EFS, respectively. The WBC was stratified by 30×109/L. In the genetic-based risk stratification, BCR::ABL1, ETV6::RUNX1, DUX4::IGH and TCF3::PBX1 were categorized into standard risk subgroup, otherwise were into poor risk subgroup.

library(dplyr)
library(openxlsx)
library(survival)
library(survminer)

#-------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#-------------------------------------------------------------------------------
tt <- readxl::read_excel("raw_data/Sample_RNA_190_fusion_mutation_v1130.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(mutation,fusion,RNA_id,Prediction_Check)

M_label <- data.table::fread("result/Figure4/4B_cluster_B.txt") %>%
  as.data.frame() %>%
  dplyr::mutate(Clusters=ifelse(Clusters %in% c("BC1","BC2"),"BC1+BC2",Clusters)) %>%
  dplyr::mutate(Clusters=as.factor(Clusters)) %>%
  dplyr::select(Clusters,Pid)

##### new survival #####
clinical_survival <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::rename("OS"="OS (days)","EFS"="EFS (days)","RFS"="RFS (days)","ifHSCT"="HSCT_stat",
         "WBC"="WBC (10⁹/L)","Age"="Age (yrs)","Pid"="PID","RNA_id_raw"="RNA_ID_raw") %>%
  dplyr::select(Pid,RNA_id_raw,Age,Sex,WBC,ifHSCT,OS,OS_stat,EFS,EFS_stat,RFS,RFS_stat,D28_status) %>%
  dplyr::mutate(WBC_type= ifelse(WBC>30, "high",
                                 ifelse(WBC<=30,"low",NA))) %>%
  dplyr::mutate(ifHSCT=as.factor(ifHSCT)) %>%
  dplyr::inner_join(M_label,by="Pid") %>%
  dplyr::left_join(tt,by=c("RNA_id_raw"="RNA_id")) %>%
  dplyr::mutate(NCCN=ifelse(fusion %in% c("ETV6-RUNX1","DUX4-IGH","TCF3-PBX1"),"Standard risk",
                            ifelse(fusion %in% c("KMT2A-AFF1","EP300-ZNF384","SMARCA2-ZNF384"),"Poor risk",
                                   ifelse(Prediction_Check=="Ph-like","Poor risk",
                                          ifelse(Prediction_Check %in% c("BCL2/MYC","PAX5alt","High hyperdoploid", "Low hypodiploid","IDH","TWIST2-high"),"Poor risk","Standard risk"))))) %>%
  dplyr::mutate(NCCN=factor(NCCN,levels=c("Standard risk","Poor risk"))) %>%
  dplyr::mutate(Sex=factor(Sex,levels=c("M","F"))) %>%
  dplyr::mutate(Age=ifelse(Age>40,"old","young")) %>%
  dplyr::mutate(Age=factor(Age,levels=c("young","old")))

var.names <- c("Clusters","WBC_type","Age","NCCN","D28_status","ifHSCT")

#-------------------------------------------------------------------------------
#  Step 2: Event-free survival (EFS)
#-------------------------------------------------------------------------------
f_efs <- as.formula(paste('Surv(EFS, EFS_stat)~',paste(var.names,collapse = "+")))
fit.final <- coxph(f_efs,data=clinical_survival)
p_efs <- ggforest(fit.final,main="EFS Hzard Ratio",
                  cpositions=c(0.02,0.22,0.4),
                  fontsize=0.8,refLabel="reference",
                  noDigits=2,data=clinical_survival)

#-------------------------------------------------------------------------------
#  Step 3: Overall survival(OS)
#-------------------------------------------------------------------------------
f_os <- as.formula(paste('Surv(OS, OS_stat)~',paste(var.names,collapse = "+")))
fit.os <- coxph(f_os,data=clinical_survival)
p_os <- ggforest(fit.os,main="OS Hzard Ratio",
                 cpositions=c(0.02,0.22,0.4),
                 fontsize=0.8,refLabel="reference",
                 noDigits=2,data=clinical_survival)

ggsave(p_efs,filename = "result/Figure4/4D_HR_EFS_B_NCCN_1plus2vs3.pdf",width=9,height=4.5)
ggsave(p_os,filename = "result/Figure4/4D_HR_OS_B_NCCN_1plus2vs3.pdf",width=9,height=4.5)

5.5 (E) Subnetwork between BC3 and BC1/2

A core metabolism-based subnetwork that best explains the difference between BC3 and BC1/2. In this network, each circular node represents a metabolite, labeled with its KEGG ID, and each square node represents a gene. Node colors indicate logFC values: red and green signify different gene expression levels, while yellow and blue signify varying metabolite levels. The highlighted labels correspond to pathways in the cholesterol metabolism and nicotinate and nicotinamide metabolism.

library(dplyr)
library(ggplot2)
library(igraph)
library(MNet)
library(graphics)
library(cowplot)

rm(list = ls())

#-------------------------------------------------------------------------------
#  Step 1: Read data and set parameters
#-------------------------------------------------------------------------------

gene1 <- readxl::read_excel("raw_data/Chol_Transport_Deg_Ester.xlsx") %>%
  as.data.frame()

name_gene <- data.table::fread("result/Figure4/4A_1.cox_gene_all_B.txt") %>%
  as.data.frame()

diff_gene <- data.table::fread("result/Figure4/S5H_gene_B.txt") %>%
  as.data.frame() %>%
  filter(name %in% name_gene$name_raw)

kid <- openxlsx::read.xlsx("raw_data/cell_metabolite_info_all_v1109.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(refmet_name,KEGG) %>%
  mutate(KEGG=ifelse(KEGG=="None",refmet_name,KEGG))

diff_meta <- data.table::fread("result/Figure4/S5H_metabolite_B.txt") %>%
  as.data.frame() %>%
  inner_join(kid,by=c("name"="refmet_name")) %>%
  dplyr::select(-name) %>%
  dplyr::rename("name"="KEGG") %>%
  arrange(P.Value) %>%
  distinct(name,.keep_all = T)

names(diff_meta)[4]  <- "p_value"

names(diff_gene)[4] <- "p_value"

dd <- data.table::fread("raw_data/subnetwork_important_edge-300_B.txt") %>%
  as.data.frame()

set.seed(1)
g <- graph_from_data_frame(dd)
layout <- layout_with_dh(g,set.seed(1))  
row.names(layout) <- names(V(g))
colnames(layout) <- c("pos.x","pos.y")

kk <- PathwayExtendData %>%
  filter(kegg_pathwayname=="Nicotinate and nicotinamide metabolism")

node.data <- as.data.frame(layout) %>%
  tibble::rownames_to_column(var="label") %>%
  left_join(diff_gene,by=c("label"="name")) %>%
  left_join(diff_meta,by=c("label"="name")) %>%
  dplyr::select(label,pos.x,pos.y,logFC.x,logFC.y) %>%
  mutate(logFC=ifelse(is.na(logFC.x),logFC.y,logFC.x)) %>%
  mutate(type=ifelse(is.na(logFC.x),"metabolite","gene")) %>%
  dplyr::select(-logFC.x,-logFC.y) %>%
  mutate(label1=ifelse(label %in% gene1$`Gene Name` |label %in% kk$name| type=="metabolite",label,NA))

ab <- node.data %>%
  filter(!is.na(label1)) %>%
  left_join(gene1,by=c("label1"="Gene Name"))
write.table(ab,"result/Figure4/4E_node-filter.txt",quote=F,row.names = F,sep="\t")

edge.data <- dd %>%
  dplyr::rename("fromNode"="X1",
         "toNode"="X2") %>%
  dplyr::select(fromNode,toNode) %>%
  dplyr::left_join(node.data,by=c("fromNode"="label")) %>%
  dplyr::rename("from.x"="pos.x","from.y"="pos.y") %>%
  dplyr::left_join(node.data,by=c("toNode"="label")) %>%
  dplyr::rename("to.x"="pos.x","to.y"="pos.y")

#-------------------------------------------------------------------------------
#  Step 2: Core metabolism-based subnetwork
#-------------------------------------------------------------------------------
## node.color
source("raw_data/node.color.R")
source("raw_data/col.key.R")
source("raw_data/colorpanel2.R")

node_meta <- node.data %>%
  filter(type=="metabolite") %>%
  mutate(logFC=round(logFC,2)) %>%
  mutate(logFC=ifelse(logFC < -0.58, -0.58, logFC))
meta_limits=as.numeric(sprintf("%.1f", max(max(node_meta$logFC),abs(min(node_meta$logFC)))))+0.1
meta_color <- node.color(limit=meta_limits,node_meta$logFC,low="#007DDB", mid="gray", high="yellow")
node_meta$color <- meta_color

node_gene <- node.data %>%
  filter(type=="gene") %>%
  mutate(logFC=round(logFC,2)) %>%
  mutate(logFC=ifelse(logFC < -2, -2, logFC)) %>%
  mutate(logFC=ifelse(logFC > 2, 2, logFC))
gene_limits=as.numeric(sprintf("%.1f", max(max(node_gene$logFC),abs(min(node_gene$logFC)))))+0.1
gene_color <- node.color(limit=gene_limits,node_gene$logFC,low="#4FBD81",mid="gray",high="#D01910")
node_gene$color <- gene_color

node.data1 <- rbind(node_meta,node_gene)

aa=node.data1$color
names(aa)=node.data1$color

gg <- ggplot()
gg <- gg + geom_segment(mapping = aes(x = from.x, y = from.y, xend = to.x, yend = to.y),
                        color = "#CCCCCC", size = 0.5, data = edge.data) 
gg <- gg + geom_point(mapping = aes(x = pos.x, y = pos.y,color=color,shape=type), 
                      size = 4, data = node.data1) 
gg <- gg + scale_size(range = c(0, 6) * 2) 
gg <- gg + theme_void()+theme(legend.position="none")
gg <- gg + labs(x = "", y = "")
gg <- gg + ggrepel::geom_text_repel(mapping=aes(x = pos.x, y = pos.y,label=label1),data=node.data1)
gg <- gg + scale_colour_manual(values = aa)+scale_shape_manual(values=c("gene"="square","metabolite"="circle"))

p <- plot.new()
off.sets=col.key(limit=gene_limits,bins=10,cex=0.5,graph.size = c(1,1),off.sets=c(x=0,y=0),
                 low="#4FBD81",mid="gray",high="#D01910")
off.sets=col.key(limit=meta_limits,bins=10,cex=0.5,low="#007DDB", mid="gray", high="yellow",
                   off.sets=c(x=0,y=0),graph.size = c(1,0.9))
p <- recordPlot()
dev.off()

p1 <- ggdraw()+
  draw_plot(p,0.5,0.5,0.5,0.55)+
  draw_plot(gg,0,0,1,1)

ggsave("result/Figure4/4E_node-300-all-B.pdf",p1,width=6,height = 6)

if (0) {
  png("result/Figure4/4E_node-300-aa.png",width=5,height = 5,units = 'in', res = 200)
  grid::grid.draw(p1)
  dev.off()
}

5.6 (F) Discrimination of T-ALL Clusters

The discrimination of two T-ALL clusters (TC1 and TC2) based on SNF method.

library(dplyr)
library(MNet)
library(survival)
library(survminer)

#-------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#-------------------------------------------------------------------------------
dat_filter <- data.table::fread("result/Figure2/1.BMMCvsHC.txt") %>%
  as.data.frame() %>%
  dplyr::arrange(P.Value) %>%
  dplyr::filter(P.Value < 0.5)
dat_filter$P.Value[1:5]

dat_cox <- data.table::fread("result/Figure4/4A_1.cox_metabolite_all_T.txt") %>%
 as.data.frame() %>%
  dplyr::arrange(p.value) %>%
  head(n=120)
dat_cox$p.value[1:5]
dat_filter <- dat_filter %>%
  dplyr::filter(name %in% dat_cox$name_raw)

sample_info <- readxl::read_excel("raw_data/sample_clinical_201_info.xlsx") %>%
  as.data.frame() %>%
  dplyr::filter(!is.na(METc_BM_D0_ID)) %>%
  dplyr::filter(!is.na(RNA_ID)) %>%
  dplyr::filter(Lineage=="T")

dat_cell <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  as.data.frame() %>%
  dplyr::filter(label %in% dat_filter$name) %>%
  tibble::column_to_rownames("label") %>%
  dplyr::select(all_of(sample_info$METc_BM_D0_ID)) %>%
  log1p()

names(dat_cell) <- sample_info$Pid

cox_gene <- data.table::fread("result/Figure4/4A_1.cox_gene_all_T.txt") %>%
  as.data.frame() %>%
  dplyr::arrange(p.value) %>%
  head(n=600)

dat_gene <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(sample_info$RNA_id_raw) %>%
  tibble::rownames_to_column(var="label") %>%
  dplyr::filter(label %in% cox_gene$name_raw) %>%
  tibble::column_to_rownames("label")

names(dat_gene) <- sample_info$Pid

#-------------------------------------------------------------------------------
#  Step 2: SNF of discrimination T-ALL clusters
#-------------------------------------------------------------------------------
library(SNFtool)
metabolite_norm = standardNormalization(t(dat_cell))
metgene_norm = standardNormalization(t(dat_gene))

dim(metabolite_norm)
dim(metgene_norm)

write.table(metabolite_norm,"result/Figure4/4F_metabolite_norm_T.txt",quote=F,sep="\t")
write.table(metgene_norm,"result/Figure4/4F_metgene_norm_T.txt",quote=F,sep="\t")

Dist1 = (SNFtool::dist2(as.matrix(metabolite_norm),as.matrix(metabolite_norm)))^(1/2)
Dist2 = (SNFtool::dist2(as.matrix(metgene_norm),as.matrix(metgene_norm)))^(1/2)
W1 = affinityMatrix(Dist1, 10, 0.5)
W2 = affinityMatrix(Dist2, 10, 0.5)
W = SNF(list(W1,W2), 10, 10)

Clusters = spectralClustering(W,K=2)
M_label=cbind(Clusters,sample_info) %>%
  dplyr::mutate(Clusters=ifelse(Clusters==1,"TC2",
                                ifelse(Clusters==2,"TC1",Clusters)))
write.table(M_label,paste0("result/Figure4/4F_cluster_T.txt"),quote=F,row.names = F,sep="\t")

l <- M_label %>%
  dplyr::arrange(Clusters)

W_2 <- W %>%
  as.data.frame() %>%
  dplyr::select(l$Pid) %>%
  tibble::rownames_to_column(var="sample") %>%
  dplyr::arrange(match(sample,l$Pid)) %>%
  tibble::column_to_rownames("sample")
W_2[W_2==0.5] <- 0

col_fun = circlize::colorRamp2(c( 0, 0.03), c( "white","#F48326"))

library(ComplexHeatmap)
p_metabolite <- Heatmap(W_2,height=unit(10,"cm"),name="Sample Data",
                        cluster_rows = F,cluster_columns = F,
                        col = col_fun,
                        #top_annotation =top_annotation,
                        show_column_names = F,show_row_names = F,
                        row_names_gp = gpar(fontsize = 3),column_names_gp = gpar(fontsize = 3))

pdf("result/Figure4/4F_SNF_discrimination_complexheatmap_T.pdf",width=5,height = 5)
p_metabolite
dev.off()

5.7 (G) OS of TC1/2

The association of the two metabolic clusters with clinical OS outcomes in 24 T-ALL patients. The result suggested TC1 was of significantly poorer prognosis than TC2.

library(dplyr)
library(MNet)
library(survival)
library(survminer)

#------------------------------------------------------------------------------
#  OS of TC1/2
#------------------------------------------------------------------------------
##### new survival #####
survival_new <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::select(PID,OS_stat,`OS (days)`) %>%
  dplyr::rename("OS"="OS (days)")

clinical_survival <- data.table::fread(paste0("result/Figure4/4F_cluster_T.txt")) %>%
  as.data.frame() %>%
  dplyr::select(-OS_stat,-OS,-EFS_stat,-EFS,-RFS_stat,-RFS) %>%
  left_join(survival_new,by=c("Pid"="PID"))

surv_object <- Surv(time = clinical_survival$OS/30, event = clinical_survival$OS_stat)
fit_cluster <- survfit(surv_object ~ Clusters, data = clinical_survival)
p1 <- survminer::ggsurvplot(fit_cluster, data = clinical_survival, 
                            title="OS",
                            palette=c("#9BD53F","#A6CEE3"),
                            tables.theme = theme_void(),risk.table = TRUE,
                            risk.table.fontsize=6,
                            break.time.by = 12,
                            risk.table.y.text = FALSE, 
                            tables.height = 0.15,risk.table.col = "black",
                            pval = TRUE,
                            pval.method = F,pval.coord=c(0.5,0.1),pval.size=6,
                            font.tickslab=c(14,"plain","black"),
                            xlab="Time (Months) ",ylab="OS Survival")

pdf(paste0("result/Figure4/4G_OS_T.pdf"),onefile=FALSE)
p1
dev.off()

5.8 (H) Subnetwork between TC2 and TC1

A core metabolism-based subnetwork that best explains the difference between TC2 and TC1. The highlighted labels correspond to pathways in the TCA and OXPHOS. PC refers to a group of phosphatidylcholines without specific KEGG IDs.

library(dplyr)
library(ggplot2)
library(igraph)
library(MNet)
library(graphics)
library(cowplot)

#-------------------------------------------------------------------------------
#  Step 1: Read data and set parameters
#-------------------------------------------------------------------------------

rm(list = ls())
name_gene <- data.table::fread("result/Figure4/4A_1.cox_gene_all_T.txt") %>%
  as.data.frame()

diff_gene <- data.table::fread("result/Figure4/S6J_gene_T.txt") %>%
  as.data.frame() %>%
  filter(name %in% name_gene$name_raw)

kid <- openxlsx::read.xlsx("raw_data/cell_metabolite_info_all_v1109.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(refmet_name,KEGG) %>%
  mutate(KEGG=ifelse(KEGG=="None",refmet_name,KEGG))

diff_meta <- data.table::fread("result/Figure4/S6J_metabolite_T.txt") %>%
  as.data.frame() %>%
  inner_join(kid,by=c("name"="refmet_name")) %>%
  dplyr::select(-name) %>%
  dplyr::rename("name"="KEGG") %>%
  arrange(P.Value) %>%
  distinct(name,.keep_all = T)

names(diff_meta)[4]  <- "p_value"

names(diff_gene)[4] <- "p_value"

dd <- data.table::fread("raw_data/subnetwork_important_edge-300_T.txt") %>%
  as.data.frame()

set.seed(2)
g <- graph_from_data_frame(dd)
layout <- layout_with_dh(g,set.seed(1))  
row.names(layout) <- names(V(g))
colnames(layout) <- c("pos.x","pos.y")

kk <- PathwayExtendData %>%
  filter(kegg_pathwayname %in% c("Citrate cycle (TCA cycle)","Oxidative phosphorylation"))

node.data <- as.data.frame(layout) %>%
  tibble::rownames_to_column(var="label") %>%
  left_join(diff_gene,by=c("label"="name")) %>%
  left_join(diff_meta,by=c("label"="name")) %>%
  dplyr::select(label,pos.x,pos.y,logFC.x,logFC.y) %>%
  mutate(logFC=ifelse(is.na(logFC.x),logFC.y,logFC.x)) %>%
  mutate(type=ifelse(is.na(logFC.x),"metabolite","gene")) %>%
  dplyr::select(-logFC.x,-logFC.y) %>%
  mutate(label1=ifelse(label %in% kk$name,label,NA))

edge.data <- dd %>%
  dplyr::rename("fromNode"="X1",
         "toNode"="X2") %>%
  dplyr::select(fromNode,toNode) %>%
  dplyr::left_join(node.data,by=c("fromNode"="label")) %>%
  dplyr::rename("from.x"="pos.x","from.y"="pos.y") %>%
  dplyr::left_join(node.data,by=c("toNode"="label")) %>%
  dplyr::rename("to.x"="pos.x","to.y"="pos.y")

## node.color
source("raw_data/node.color.R")
source("raw_data/col.key.R")
source("raw_data/colorpanel2.R")

node_meta <- node.data %>%
  filter(type=="metabolite") %>%
  mutate(logFC=round(logFC,2)) %>%
  mutate(logFC=ifelse(logFC < -0.58, -0.58, logFC))
meta_limits=as.numeric(sprintf("%.1f", max(max(node_meta$logFC),abs(min(node_meta$logFC)))))+0.1
meta_color <- node.color(limit=meta_limits,node_meta$logFC,low="#007DDB", mid="gray", high="yellow")
node_meta$color <- meta_color

node_gene <- node.data %>%
  filter(type=="gene") %>%
  mutate(logFC=round(logFC,2)) %>%
  mutate(logFC=ifelse(logFC < -2, -2, logFC)) %>%
  mutate(logFC=ifelse(logFC > 2, 2, logFC))
gene_limits=as.numeric(sprintf("%.1f", max(max(node_gene$logFC),abs(min(node_gene$logFC)))))+0.1
gene_color <- node.color(limit=gene_limits,node_gene$logFC,low="#4FBD81",mid="gray",high="#D01910")
node_gene$color <- gene_color

node.data1 <- rbind(node_meta,node_gene)

aa=node.data1$color
names(aa)=node.data1$color

gg <- ggplot()
gg <- gg + geom_segment(mapping = aes(x = from.x, y = from.y, xend = to.x, yend = to.y),
                        color = "#CCCCCC", size = 0.5, data = edge.data) 
gg <- gg + geom_point(mapping = aes(x = pos.x, y = pos.y,color=color,shape=type), 
                      size = 4, data = node.data1) 
gg <- gg + scale_size(range = c(0, 6) * 2) 
gg <- gg + theme_void()+theme(legend.position="none")
gg <- gg + labs(x = "", y = "")
gg <- gg + ggrepel::geom_text_repel(mapping=aes(x = pos.x, y = pos.y,label=label1),data=node.data1)
gg <- gg + scale_colour_manual(values = aa)+scale_shape_manual(values=c("gene"="square","metabolite"="circle"))

pdf("result/Figure4/4H_node-200-1_T.pdf")
p <- plot.new()
off.sets=col.key(limit=gene_limits,bins=10,cex=0.5,graph.size = c(1,1),off.sets=c(x=0,y=0),
                 low="#4FBD81",mid="gray",high="#D01910")
off.sets=col.key(limit=meta_limits,bins=10,cex=0.5,low="#007DDB", mid="gray", high="yellow",
                   off.sets=c(x=0,y=0),graph.size = c(1,0.9))
p <- recordPlot()
dev.off()

if (0) {
p1 <- ggdraw()+
  draw_plot(p,0.5,0.5,0.5,0.55)+
  draw_plot(gg,0,0,1,1)
ggsave("result/Figure4/4H_node-300-all.pdf",p1,width=6,height = 6)
}

ggsave("result/Figure4/4H_node-300-2_T.pdf",gg,width=6,height = 6)


if (0) {
  png("result/Figure4/4H_node-300-aa.png",width=5,height = 5,units = 'in', res = 200)
  grid::grid.draw(p1)
  dev.off()
}

5.9 (S5 A) Clusters of BCP-ALL

The relative abundance of input metabolites and expression level of metabolic genes in the three metabolic clusters in BCP-ALL: BC1 (N=20), BC2 (N=35), and BC3 (N=20). Each column represents a sample and each row represents a metabolite (upper panel) or gene included (lower panel).

#------------------------------------------------------------------------------
#  Step 1: Heatmap
#------------------------------------------------------------------------------
library(dplyr)
library(ComplexHeatmap)

sample_info <- data.table::fread("result/Figure4/4B_cluster_B.txt") %>%
  as.data.frame()

metabolite_norm = data.table::fread("result/Figure4/4B_metabolite_norm_B.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("V1")

metgene_norm <- data.table::fread("result/Figure4/4B_metgene_norm_B.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("V1")

top_annotation = HeatmapAnnotation(Subtype=sample_info$Clusters,
  col=list(Subtype=c("BC1"="#9BD53F","BC2"="#A6CEE3","BC3"="#EA644C")),
  show_annotation_name = c(T,T), 
  border = c(T,T) ,
  simple_anno_size = unit(4, "mm"),gap = unit(1, "mm"))

col_fun = circlize::colorRamp2(c(-1, 0, 1), c("#00599F", "white", "#D01910"))

p_metabolite <- Heatmap(t(metabolite_norm),height=unit(10,"cm"),name="Metabolite Data",
                        cluster_row_slices = F,clustering_method_rows = "ward.D",
                        clustering_method_columns = "ward.D2",
                        column_split = sample_info$Clusters,
                        col = col_fun,
                        top_annotation =top_annotation,
                        show_column_names = F,show_row_names = F,
                        row_names_gp = gpar(fontsize = 3),
                        column_names_gp = gpar(fontsize = 3))

col_fun = circlize::colorRamp2(c(-1, 0, 1), c("#00599F", "white", "#FF7F00"))

p_gene <- Heatmap(t(metgene_norm),height=unit(10,"cm"),name="Gene Data",
                  cluster_row_slices = F,clustering_method_rows = "ward.D",
                  clustering_method_columns = "ward.D2",
                  column_split = sample_info$Clusters,
                  col = col_fun,
                  show_column_names = F,show_row_names = F,
                  row_names_gp = gpar(fontsize = 3),
                  column_names_gp = gpar(fontsize = 3))

pdf(paste0("result/Figure4/S5A_heatmap_B.pdf"),width=8,height = 10)
p_metabolite %v% p_gene
dev.off()

5.10 (S5 G) Waterfall plot of BC1/2/3

The associations of metabolic clusters with clinical variables and genetic alterations, including major mutations and fusions in BCP-ALL. Note: Among all fusions, only BCR::ABL1 and TCF3::PBX1 were significantly enriched in BC1/2 and BC3, respectively. P-value was calculated by Pearson’s chi-square test or Fisher’s exact test.

library(dplyr)
library(ComplexHeatmap)

#-------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#-------------------------------------------------------------------------------

clinical_survival <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::rename("OS"="OS (days)","EFS"="EFS (days)","RFS"="RFS (days)","ifHSCT"="HSCT_stat",
         "WBC"="WBC (10⁹/L)","Age"="Age (yrs)","Pid"="PID","RNA_id_raw"="RNA_ID_raw") %>%
  dplyr::select(Pid,RNA_id_raw,Age,Sex,WBC,ifHSCT,OS_stat,D28_status) %>%
  #dplyr::mutate(Age=ifelse(Age < 40,"< 40",">= 40")) %>%
  dplyr::mutate(WBC_type= ifelse(WBC>30, "> 30",
                                 ifelse(WBC<=30,"<= 30",NA))) %>%
  dplyr::mutate(Outcome=ifelse(OS_stat==1,"Dead",
                               ifelse(OS_stat==0,"Alive",OS_stat))) %>%
  dplyr::mutate(ifHSCT=ifelse(ifHSCT==0,"no",
                              ifelse(ifHSCT==1,"yes",ifHSCT)))
  
sample_info_raw <- data.table::fread("raw_data/1.cluster_B.txt") %>%
  as.data.frame() %>%
  dplyr::select(bianhao,Clusters)

sample_info <- data.table::fread("result/Figure4/4B_cluster_B.txt") %>%
  as.data.frame() %>%
  dplyr::select(Clusters,Pid) %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::arrange(match(bianhao,sample_info_raw$bianhao)) %>%
  dplyr::left_join(clinical_survival,by="Pid") %>%
  dplyr::rename("RNA_id"="RNA_id_raw")

#-------------------------------------------------------------------------------
#  Step 2: Mutations
#-------------------------------------------------------------------------------
q12 <- c("#1F78B4","#E31A1C","black")
names(q12) <- unique(sample_info$Clusters)

top_annotation = HeatmapAnnotation(
  Subtype=sample_info$Clusters,
  #Age=sample_info$Age,
  ifHSCT=sample_info$ifHSCT,
  WBC=sample_info$WBC_type,
  Outcome=sample_info$Outcome,
  col=list(Subtype=c("BC1"="#9BD53F","BC2"="#A6CEE3","BC3"="#EA644C"),
           Age=c("< 40"="#B2DF8A",">= 40"="gray"),
           ifHSCT=c("yes"="#CAB2D6","no"="gray"),
           WBC=c("> 30"="#FDBF6F","<= 30"="gray"),
           Outcome=c("Dead"="black","Alive"="gray")),
  show_annotation_name = c(T,T), 
  border = c(T,T,T,T,T,T) ,
  simple_anno_size = unit(8, "mm"),gap = unit(1.5, "mm"))

mutation_B <- data.table::fread("raw_data/mutation_B_v1114_all.txt") %>%
  as.data.frame() %>%
  dplyr::select(V2,V17,V19) %>%
  dplyr::rename("ID"="V2","Gene.refGene"="V17","ExonicFunc.refGene"="V19") %>%
  dplyr::select(ID,`Gene.refGene`,`ExonicFunc.refGene`) %>%
  dplyr::mutate(`ExonicFunc.refGene`=
                  ifelse(`ExonicFunc.refGene` %in% c("frameshift deletion","frameshift insertion","frameshift substitution"),"Frameshift",
                         ifelse(`ExonicFunc.refGene` %in% c("nonframeshift deletion","nonframeshift insertion","nonframeshift substitution"),"Nonframeshift",
                                ifelse(`ExonicFunc.refGene` %in% ".","Splice",
                                       ifelse(`ExonicFunc.refGene` %in% "nonsynonymous SNV","Missense",
                                              ifelse(`ExonicFunc.refGene`=="startloss","Startloss",
                                                     ifelse(`ExonicFunc.refGene`=="stopgain","Stopgain",
                                                            ifelse(`ExonicFunc.refGene`=="synonymous SNV","Silent",`ExonicFunc.refGene`)))))))) %>%
  unique() %>%
  dplyr::filter(ID %in% sample_info$RNA_id) %>%
  dplyr::mutate(`Gene.refGene` = ifelse(`Gene.refGene` %in% c("IDH1","IDH2"),"IDH1/2",`Gene.refGene`))

mutation <- rbind(mutation_B)

hit_num <- mutation %>%
  dplyr::group_by(ID,`Gene.refGene`) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::filter(n>1)

mutation <- mutation %>% 
  dplyr::left_join(hit_num,by=c("ID","Gene.refGene")) %>%
  dplyr::mutate(`ExonicFunc.refGene`=ifelse(!is.na(n),"Multiple",`ExonicFunc.refGene`)) %>%
  dplyr::select(-n) %>%
  unique() %>%
  dplyr::rename("RNA_id"="ID") %>%
  dplyr::filter(RNA_id %in% sample_info$RNA_id)

mutation_gene_filter <- mutation %>%
  dplyr::select(RNA_id,Gene.refGene) %>%
  unique() %>%
  dplyr::group_by(`Gene.refGene`) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::filter(n>0)
mutation_sort <- rev(names(sort(table(mutation %>% filter(`Gene.refGene` %in% mutation_gene_filter$Gene.refGene) %>% pull(Gene.refGene)))))

mutation <- mutation %>%
  dplyr::filter(`Gene.refGene` %in% mutation_gene_filter$Gene.refGene) %>%
  as.data.frame() %>%
  dplyr::distinct(RNA_id,Gene.refGene,.keep_all = T)

sample_info_filter <- sample_info %>%
  dplyr::select(bianhao,RNA_id)

mutation_dcast <- reshape2::dcast(mutation,RNA_id ~ `Gene.refGene`) %>%
  dplyr::right_join(sample_info_filter,by="RNA_id") %>%
  dplyr::select(-RNA_id) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::select(all_of(mutation_sort))

mutation_dcast[is.na(mutation_dcast)] <- 0
mutation_dcast[mutation_dcast=="Missense"] <- 1
mutation_dcast[mutation_dcast=="Frameshift"] <- 2
mutation_dcast[mutation_dcast=="Nonframeshift"] <- 3
mutation_dcast[mutation_dcast=="Silent"] <- 4
mutation_dcast[mutation_dcast=="Splice"] <- 5
mutation_dcast[mutation_dcast=="Stopgain"] <- 7
mutation_dcast[mutation_dcast=="Multiple"] <- 8

mutation_dcast1 <- mutation_dcast %>%
  dplyr::mutate_if(is.character,as.numeric) %>%
  rowwise() %>%
  dplyr::mutate(Signaling=sum(NRAS,KRAS,FLT3,STAT5B,NF1,MYC)) %>%
  dplyr::mutate(Transcription=sum(PAX5,TP53,RUNX1,IKZF1,ETV6,MGA)) %>%
  dplyr::mutate(Epigenetic=sum(KMT2D,SETD2,NCOR2,EP300,ARID1A,ARID1B,SETD1B,`IDH1/2`)) %>%
  dplyr::mutate(Others=sum(SACS)) %>%
  dplyr::mutate(Signaling=ifelse(Signaling==0,0,9)) %>%
  dplyr::mutate(Transcription=ifelse(Transcription==0,0,9)) %>%
  dplyr::mutate(Epigenetic=ifelse(Epigenetic==0,0,9)) %>%
  dplyr::mutate(Others=ifelse(Others==0,0,9)) %>%
  dplyr::select(NRAS,FLT3,STAT5B,NF1,KRAS,MYC,Signaling,
                PAX5,TP53,RUNX1,IKZF1,ETV6,MGA,Transcription,
                KMT2D,SETD2,NCOR2,EP300,ARID1A,ARID1B,SETD1B,`IDH1/2`,Epigenetic,
                SACS,Others) %>%
  as.data.frame()

rownames(mutation_dcast1) <- rownames(mutation_dcast)
mutation_dcast <- mutation_dcast1

mutation_dcast_temp <- mutation_dcast
mutation_dcast_temp[mutation_dcast_temp!=0] <- 1
num=colSums(mutation_dcast_temp)
names(mutation_dcast) <- paste0(names(mutation_dcast)," (n=",num,")")

d_mutation <- t(mutation_dcast) %>%
  as.data.frame() %>%
  dplyr::select(sample_info$bianhao)

mutation_annotation <- Heatmap(d_mutation,height=unit(12,"cm"),rect_gp = gpar(col = "gray", lwd = 2),
                               col=c("#EEEEEE","#3987CC","#DB3D3D","#663FFB","#FF7F0E","#CC66FF","#8B564C","#339933","blue"),
                               cluster_columns = F,cluster_rows = F,border = TRUE,column_title = NULL,
                               column_split =sample_info$Clusters,
                               show_column_names = TRUE,
                               top_annotation=top_annotation,
                               row_names_gp = gpar(fontsize = 10),column_names_gp=gpar(fontsize=6))

#-------------------------------------------------------------------------------
#  Step 3: Fusions
#-------------------------------------------------------------------------------
fusion_B <- readxl::read_excel("raw_data/fusion_B_v0803.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::filter(RNA_id %in% sample_info$RNA_id) %>%
  dplyr::select(RNA_id,fusion_group) %>%
  dplyr::mutate(fusion_group=ifelse(fusion_group %in% c("PAX5-ETV6","PAX5-CPLX1"),"PAX5 fusion",
                                    fusion_group)) %>%
  dplyr::mutate(fusion_group=ifelse(fusion_group=="BCR-ABL1;IKZF1-IGH","BCR-ABL1",fusion_group)) %>%
  dplyr::left_join(sample_info,by="RNA_id") %>%
  dplyr::filter(fusion_group %in% c("BCL2/MYC","BCR-ABL1","DUX4 rearrangement","ETV6-RUNX1","KMT2A rearrangement","PAX5 fusion","TCF3-PBX1","ZNF384 fusion")) %>%
  dplyr::select(RNA_id,fusion_group) %>%
  dplyr::filter(!is.na(fusion_group))

fusion <- rbind(fusion_B)

clinical_fusion_dcast <- reshape2::dcast(fusion,RNA_id ~ fusion_group) %>%
  dplyr::right_join(sample_info_filter,by="RNA_id") %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::select(-RNA_id)

clinical_fusion_dcast[is.na(clinical_fusion_dcast)] <- 0
clinical_fusion_dcast[clinical_fusion_dcast != 0] <- 1

d_fusion <- t(clinical_fusion_dcast) %>%
  as.data.frame() %>%
  dplyr::select(sample_info$bianhao) %>%
  tibble::rownames_to_column(var="fusion") %>%
  dplyr::arrange(match(fusion,c("DUX4 rearrangement","ZNF384 fusion","TCF3-PBX1","KMT2A rearrangement","ETV6-RUNX1","PAX5-ETV6","PAX5 fusion","BCL2/MYC"))) %>%
  tibble::column_to_rownames("fusion")

fusion_annotation <- Heatmap(d_fusion,rect_gp = gpar(col = "gray", lwd = 2),
                             col=c("#EEEEEE","#444444"),
                             height=unit(4,"cm"),border = TRUE,
                             column_split =sample_info$Clusters,
                             cluster_columns = F,cluster_rows = F,
                             column_title = NULL,
                             row_names_gp = gpar(fontsize = 10),
                             column_names_gp=gpar(fontsize=6))

pdf(paste0("result/Figure4/S5G_oncoplot_B.pdf"),width=10,height = 10)
mutation_annotation %v% fusion_annotation
dev.off()

5.11 (S5 H) DA score of BC3 cluster

The DA score reveals that most of the disturbed metabolism pathways are significantly enriched in the BC3 cluster.

library(ggplot2)
library(MNet)
library(dplyr)

#------------------------------------------------------------------------------
#  Stpe 1: mlimma of BCP-ALL clusters
#------------------------------------------------------------------------------
cluster <- data.table::fread("result/Figure4/4B_cluster_B.txt") %>%
  as.data.frame() 
dat <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  dplyr::select(label,all_of(cluster$METc_BM_D0_ID)) %>%
  tibble::column_to_rownames("label")
group <- cluster$Clusters

group[which(group %in% c("BC3"))] <- "tumor"
group[which(group %in%  c("BC1","BC2"))] <- "normal"
table(group)
metabolite_all <- mlimma(log1p(dat),group)
write.table(metabolite_all,"result/Figure4/S5H_metabolite_B.txt",quote=F,row.names=F,sep="\t")

cluster <- data.table::fread("result/Figure4/4B_cluster_B.txt") %>%
  as.data.frame()

dat <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(all_of(cluster$RNA_id_raw))
group <- cluster$Clusters

group[which(group %in% c("BC3"))] <- "tumor"
group[which(group %in%  c("BC1","BC2"))] <- "normal"
table(group)

gene_all <- mlimma(dat,group)
 
write.table(gene_all,"result/Figure4/S5H_gene_B.txt",quote=F,row.names=F,sep="\t")

#------------------------------------------------------------------------------
#  Step 2: DA-score of metabolites
#------------------------------------------------------------------------------
kegg_id <- readxl::read_excel("raw_data/cell_metabolite_info_all_v1109.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(refmet_name,KEGG) %>%
  tidyr::separate_rows(KEGG,sep=";") %>%
  dplyr::filter(KEGG!="NA") %>%
  dplyr::filter(KEGG != "None") %>%
  as.data.frame()

dat <- data.table::fread("result/Figure4/S5H_metabolite_B.txt") %>%
  dplyr::arrange(desc(logFC)) %>%
  dplyr::inner_join(kegg_id,by=c("name"="refmet_name")) %>%
  dplyr::distinct(KEGG,.keep_all = TRUE)

dat_increase <- dat %>%
  dplyr::filter(logFC > 0) %>%
  dplyr::filter(P.Value < 0.05)
dat_decrease <- dat %>%
  dplyr::filter(logFC < 0) %>%
  dplyr::filter(P.Value < 0.05)

DA_result <- DAscore(dat_increase$KEGG,dat_decrease$KEGG,dat$KEGG,min_measured_num = 3,out="metabolite")
write.table(DA_result$result,"result/Figure4/S5H_DAscore_metabolite_B.txt",quote=F,row.names=F,sep="\t")

#------------------------------------------------------------------------------
#  Step 3: DA-score of genes
#------------------------------------------------------------------------------
dat <- data.table::fread("result/Figure4/S5H_gene_B.txt")

dat_increase <- dat %>%
  dplyr::filter(logFC > 0) %>%
  dplyr::filter(P.Value < 0.05)
dat_decrease <- dat %>%
  dplyr::filter(logFC < 0) %>%
  dplyr::filter(P.Value < 0.05)

DA_result <- DAscore(dat_increase$name,dat_decrease$name,dat$name,min_measured_num = 10,out="gene")
write.table(DA_result$result,"result/Figure4/S5H_DAscore_gene_B.txt",quote=F,row.names=F,sep="\t")

#------------------------------------------------------------------------------
#  Step 4: DA-score of metabolites+genes
#------------------------------------------------------------------------------
gene <- data.table::fread("result/Figure4/S5H_DAscore_gene_B.txt") %>%
  as.data.frame() %>%
  dplyr::filter(Measured_members_num >= 10) %>%
  dplyr::filter(abs(DA_score) > 0.05)

metabolite <- data.table::fread("result/Figure4/S5H_DAscore_metabolite_B.txt") %>%
  as.data.frame() %>%
  dplyr::filter(Measured_members_num >= 3) %>%
  dplyr::filter(DA_score != 0)

all_1 <- gene %>%
  dplyr::inner_join(metabolite,by="Pathway") %>%
  dplyr::filter(DA_score.x >0 & DA_score.y >0)

all_2 <- gene %>%
  dplyr::inner_join(metabolite,by="Pathway") %>%
  dplyr::filter(DA_score.x < 0 & DA_score.y < 0)

path <- c(all_1$Pathway,all_2$Pathway,"Nicotinate and nicotinamide metabolism")

metabolite_filter <- metabolite %>%
  dplyr::filter(Pathway %in% path) %>%
  dplyr::filter(Measured_members_num >= 3) %>%
  dplyr::filter(abs(DA_score) > 0.05) %>%
  dplyr::arrange(`Pathway Category`) %>%
  dplyr::mutate(Pathway=factor(Pathway,levels = Pathway))

colp <- c("Amino acid metabolism" ="#1B9E77",
          "Carbohydrate metabolism"="#D95F02",
          "Glycan biosynthesis and metabolism"="#1F78B4", 
          "Metabolism of cofactors and vitamins"="#7570B3",
          "Metabolism of terpenoids and polyketides"="#BC80BD",
          "Metabolism of other amino acids"="#8DD3C7",
          "Energy metabolism"="#E7298A",
          "Lipid metabolism"="#66A61E",
          "Nucleotide metabolism"="#E6AB02",
          "Biosynthesis of other secondary metabolites"="#A6761D",
          "Xenobiotics biodegradation and metabolism"="#666666")

p <- ggplot2::ggplot(metabolite_filter)+
  ggplot2::geom_point(ggplot2::aes(x=Pathway,y=DA_score,size=log2(Measured_members_num),color=`Pathway Category`))+
  ggplot2::geom_pointrange(ggplot2::aes(x=Pathway,y=DA_score,ymin=0,ymax=DA_score,color=`Pathway Category`))+
  scale_color_manual(values=colp)+
  ggplot2::coord_flip()+
  ggplot2::xlab(NULL)+
  ggplot2::theme_bw()

ggsave("result/Figure4/S5H_DAscore_metabolite_B_filter.pdf",p,width=8,height = 5)

5.12 (S5 I-J) GSEA between BC3 and BC1/2

I. GSEA unveils pathways significantly enriched in BC3, mirroring the findings of the metabolome analyses.
J. GSEA unveils that cholesterol biosynthesis was significantly enriched in BC3.

#------------------------------------------------------------------------------
#  Step 1: KEGG citrate TCA cycle
#------------------------------------------------------------------------------
library(dplyr)
library(ggplot2)

## Load data and set parameters
gene_all <- data.table::fread("result/Figure4/S5H_gene_B.txt") %>%
  as.data.frame()

a <- readLines("raw_data/tca.txt")
gmt <- strsplit(a, "\t")
names(gmt) <- vapply(gmt, function(y) y[1], character(1))
gmt1 <- lapply(gmt, "[", -c(1:2))

a_temp <- gene_all$logFC
names(a_temp) <- gene_all$name
  
Ranks_all <- sort(a_temp,decreasing =TRUE)
  gseaParam = 0.5
  minSize=5
  ticksSize = 0.2
  Ranks_all = sort(a_temp,decreasing =TRUE)
  fgseaRes_all <- fgsea::fgsea(gmt1, sort(a_temp,decreasing =TRUE),minSize = minSize,nPermSimple=20000,gseaParam=0.5)
  
  pval <- fgseaRes_all %>% 
    dplyr::pull(pval) %>%
    round(3)
  nes <- fgseaRes_all %>% 
    dplyr::pull(NES) %>%
    round(3)
  
  pathway <- gmt1[[1]]
  
  stats <- Ranks_all
  rnk <- rank(-stats)
  ord <- order(rnk)
  statsAdj <- stats[ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
  statsAdj <- statsAdj/max(abs(statsAdj))
  pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
  pathway <- sort(pathway)
  gseaRes <- fgsea::calcGseaStat(statsAdj, selectedStats = pathway, 
                                 returnAllExtremes = TRUE)
  bottoms <- gseaRes$bottoms
  tops <- gseaRes$tops
  n <- length(statsAdj)
  xs <- as.vector(rbind(pathway - 1, pathway))
  ys <- as.vector(rbind(bottoms, tops))
  toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0))
  diff <- (max(tops) - min(bottoms))/8
  x = y = NULL
  ## Visualization
  g1 <- ggplot2::ggplot(toPlot, aes(x = x, y = y)) +
    ggplot2::geom_point(color = "green", size = 0.1) +  
    ggplot2::geom_hline(yintercept = 0, colour = "gray") + 
    geom_line(color = "green") +
    ggplot2::geom_line(color = "green") + 
    theme_bw() + 
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   axis.text.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank()) +
    ggplot2::labs(x = NULL, y = "enrichment score",title=)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(5,5,0,5), "mm"))
  
  g2 <- ggplot2::ggplot(toPlot, aes(x = x, y = y))+
    ggplot2::geom_segment(data = data.frame(x = pathway), 
                          mapping = aes(x = x, 
                                        y = -1, xend = x, yend = 1),
                          size = ticksSize,color="red") +
    ggplot2::scale_x_continuous(expand = c(0, 0),
                                limits = c(0,length(statsAdj)+1))+
    theme_bw()+
    ggplot2::theme(panel.grid = element_blank(),
                   axis.text.x=element_blank(),axis.ticks.x=element_blank(),
                   axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    ggplot2::labs(x=NULL,y=NULL)+
    ggplot2::theme(plot.margin = unit(c(0,5,0,5), "mm"))
  
  dat <- data.frame(name=1:length(statsAdj),value=statsAdj)
  
  g3 <- ggplot2::ggplot(dat,aes(name,value))+
    ggplot2::geom_bar(stat="identity",fill="gray")+
    ggplot2::theme_bw()+
    ggplot2::theme(panel.grid = element_blank())+
    ggplot2::labs(x=NULL)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits=c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(0,5,5,5), "mm"))
  
  p1 <- cowplot::plot_grid(plotlist = list(g1,g2,g3),nrow=3,
                           rel_heights = c(1,0.2,0.5),scale=1,
                           align ="v",labels=paste0("TCA Cycle"," pval=",pval,";NES=",nes),label_size = 8)
  ggsave("result/Figure4/S5I_GSEA_B_TCA.pdf",p1,width=4,height = 4)
  
#------------------------------------------------------------------------------
#  Step 2: Mitochondrial gene expression
#------------------------------------------------------------------------------
library(dplyr)
library(ggplot2)

## Load data and set parameters
gene_all <- data.table::fread("result/Figure4/S5H_gene_B.txt") %>%
  as.data.frame()

a <- readLines("raw_data/MITOCHONDRIAL.txt")
gmt <- strsplit(a, "\t")
names(gmt) <- vapply(gmt, function(y) y[1], character(1))
gmt1 <- lapply(gmt, "[", -c(1:2))

a_temp <- gene_all$logFC
names(a_temp) <- gene_all$name
  
  Ranks_all <- sort(a_temp,decreasing =TRUE)
  gseaParam = 0.5
  minSize=5
  ticksSize = 0.2
  Ranks_all = sort(a_temp,decreasing =TRUE)
  fgseaRes_all <- fgsea::fgsea(gmt1, sort(a_temp,decreasing =TRUE),minSize = minSize,nPermSimple=20000,gseaParam=0.5)
  
  pval <- fgseaRes_all %>% 
    dplyr::pull(pval) %>%
    round(3)
  nes <- fgseaRes_all %>% 
    dplyr::pull(NES) %>%
    round(3)
  
  pathway <- gmt1[[1]]
  
  stats <- Ranks_all
  rnk <- rank(-stats)
  ord <- order(rnk)
  statsAdj <- stats[ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
  statsAdj <- statsAdj/max(abs(statsAdj))
  pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
  pathway <- sort(pathway)
  gseaRes <- fgsea::calcGseaStat(statsAdj, selectedStats = pathway, 
                                 returnAllExtremes = TRUE)
  bottoms <- gseaRes$bottoms
  tops <- gseaRes$tops
  n <- length(statsAdj)
  xs <- as.vector(rbind(pathway - 1, pathway))
  ys <- as.vector(rbind(bottoms, tops))
  toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0))
  diff <- (max(tops) - min(bottoms))/8
  x = y = NULL
  ## Visualization
  g1 <- ggplot2::ggplot(toPlot, aes(x = x, y = y)) +
    ggplot2::geom_point(color = "green", size = 0.1) +  
    ggplot2::geom_hline(yintercept = 0, 
                        colour = "gray") + geom_line(color = "green") +
    ggplot2::geom_line(color = "green") + theme_bw() + 
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   axis.text.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank()) +
    ggplot2::labs(x = NULL, y = "enrichment score",title=)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(5,5,0,5), "mm"))
  
  g2 <- ggplot2::ggplot(toPlot, aes(x = x, y = y))+
    ggplot2::geom_segment(data = data.frame(x = pathway), 
                          mapping = aes(x = x, 
                                        y = -1, xend = x, yend = 1),
                          size = ticksSize,color="red") +
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+theme_bw()+
    ggplot2::theme(panel.grid = element_blank(),
                   axis.text.x=element_blank(),axis.ticks.x=element_blank(),
                   axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    ggplot2::labs(x=NULL,y=NULL)+
    ggplot2::theme(plot.margin = unit(c(0,5,0,5), "mm"))
  
  dat <- data.frame(name=1:length(statsAdj),value=statsAdj)
  
  g3 <- ggplot2::ggplot(dat,aes(name,value))+
    ggplot2::geom_bar(stat="identity",fill="gray")+
    ggplot2::theme_bw()+
    ggplot2::theme(panel.grid = element_blank())+
    ggplot2::labs(x=NULL)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits=c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(0,5,5,5), "mm"))
  
  p1 <- cowplot::plot_grid(plotlist = list(g1,g2,g3),nrow=3,
                           rel_heights = c(1,0.2,0.5),scale=1,
                           align ="v",labels=paste0("MITOCHONDRIAL"," pval=",pval,";NES=",nes),label_size = 8)
  ggsave("result/Figure4/S5I_GSEA_B_MITOCHONDRIAL.pdf",p1,width=4,height = 4)

#------------------------------------------------------------------------------
# Step 3: Cholesterol biosynthesis pathway
#------------------------------------------------------------------------------
library(dplyr)
library(ggplot2)

## Load data and set parameters
gene_all <- data.table::fread("result/Figure4/S5H_gene_B.txt") %>%
  as.data.frame()

a <- readLines("raw_data/Cholesterol.txt")
gmt <- strsplit(a, "\t")
names(gmt) <- vapply(gmt, function(y) y[1], character(1))
gmt1 <- lapply(gmt, "[", -c(1:2))

a_temp <- gene_all$logFC
names(a_temp) <- gene_all$name

  Ranks_all <- sort(a_temp,decreasing =TRUE)
  gseaParam = 0.5
  minSize=5
  ticksSize = 0.2
  Ranks_all = sort(a_temp,decreasing =TRUE)
  fgseaRes_all <- fgsea::fgsea(gmt1, sort(a_temp,decreasing =TRUE),minSize = minSize,nPermSimple=20000,gseaParam=0.5)
  
  pval <- fgseaRes_all %>% 
    dplyr::pull(pval) %>%
    round(3)
  nes <- fgseaRes_all %>% 
    dplyr::pull(NES) %>%
    round(3)
  
  pathway <- gmt1[[1]]
  
  stats <- Ranks_all
  rnk <- rank(-stats)
  ord <- order(rnk)
  statsAdj <- stats[ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
  statsAdj <- statsAdj/max(abs(statsAdj))
  pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
  pathway <- sort(pathway)
  gseaRes <- fgsea::calcGseaStat(statsAdj, selectedStats = pathway, 
                                 returnAllExtremes = TRUE)
  bottoms <- gseaRes$bottoms
  tops <- gseaRes$tops
  n <- length(statsAdj)
  xs <- as.vector(rbind(pathway - 1, pathway))
  ys <- as.vector(rbind(bottoms, tops))
  toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0))
  diff <- (max(tops) - min(bottoms))/8
  x = y = NULL
  ## Visualization
  g1 <- ggplot2::ggplot(toPlot, aes(x = x, y = y)) +
    ggplot2::geom_point(color = "green", size = 0.1) +  
    ggplot2::geom_hline(yintercept = 0, 
                        colour = "gray") + geom_line(color = "green") +
    ggplot2::geom_line(color = "green") + theme_bw() + 
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   axis.text.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank()) +
    ggplot2::labs(x = NULL, y = "enrichment score",title=)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(5,5,0,5), "mm"))
  
  g2 <- ggplot2::ggplot(toPlot, aes(x = x, y = y))+
    ggplot2::geom_segment(data = data.frame(x = pathway), 
                          mapping = aes(x = x, 
                                        y = -1, xend = x, yend = 1),
                          size = ticksSize,color="red") +
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+theme_bw()+
    ggplot2::theme(panel.grid = element_blank(),
                   axis.text.x=element_blank(),axis.ticks.x=element_blank(),
                   axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    ggplot2::labs(x=NULL,y=NULL)+
    ggplot2::theme(plot.margin = unit(c(0,5,0,5), "mm"))
  
  dat <- data.frame(name=1:length(statsAdj),value=statsAdj)
  
  g3 <- ggplot2::ggplot(dat,aes(name,value))+
    ggplot2::geom_bar(stat="identity",fill="gray")+
    ggplot2::theme_bw()+
    ggplot2::theme(panel.grid = element_blank())+
    ggplot2::labs(x=NULL)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits=c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(0,5,5,5), "mm"))
  
  p1 <- cowplot::plot_grid(plotlist = list(g1,g2,g3),nrow=3,
                           rel_heights = c(1,0.2,0.5),scale=1,
                           align ="v",labels=paste0("Cholesterol Biosynthesis"," pval=",pval,";NES=",nes),label_size = 8)
  ggsave("result/Figure4/S5J_GSEA_B_Cholesterol.pdf",p1,width=4,height = 4)

5.13 (S6 A) Clusters of T-ALL

The relative abundance of input metabolites and expression of metabolic genes in the two metabolic clusters: TC1 (N=10) and TC2 (N=14). Each column represents a sample and each row represents a metabolite (upper panel) or gene included (lower panel).

library(dplyr)

sample_info <- data.table::fread("result/Figure4/4F_cluster_T.txt") %>%
  as.data.frame()

metabolite_norm = data.table::fread("result/Figure4/4F_metabolite_norm_T.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("V1")

metgene_norm <- data.table::fread("result/Figure4/4F_metgene_norm_T.txt") %>%
  as.data.frame() %>%
  tibble::column_to_rownames("V1") 

top_annotation = HeatmapAnnotation(
  Subtype=sample_info$Clusters,
  col=list(Subtype=c("TC1"="#9BD53F","TC2"="#A6CEE3")),
  show_annotation_name = c(T,T), 
  border = c(T,T) ,
  simple_anno_size = unit(4, "mm"),gap = unit(1, "mm"))

col_fun = circlize::colorRamp2(c(-1, 0, 1), c("#00599F", "white", "#D01910"))

p_metabolite <- Heatmap(t(metabolite_norm),height=unit(10,"cm"),
                        name="Metabolite Data",
                        cluster_row_slices = F,clustering_method_rows = "ward.D",
                        clustering_method_columns = "ward.D2",
                        column_split = sample_info$Clusters,
                        col = col_fun,
                        top_annotation =top_annotation,
                        show_column_names = F,show_row_names = F,
                        row_names_gp = gpar(fontsize = 3),
                        column_names_gp = gpar(fontsize = 3))

col_fun = circlize::colorRamp2(c(-1, 0, 1), c("#00599F", "white", "#FF7F00"))

p_gene <- Heatmap(t(metgene_norm),height=unit(10,"cm"),name="Gene Data",
                  cluster_row_slices = F,clustering_method_rows = "ward.D",
                  clustering_method_columns = "ward.D2",
                  column_split = sample_info$Clusters,
                  col = col_fun,
                  show_column_names = F,show_row_names = F,
                  row_names_gp = gpar(fontsize = 3),
                  column_names_gp = gpar(fontsize = 3))

pdf(paste0("result/Figure4/S6A_heatmap_T.pdf"),width=8,height = 10)
p_metabolite %v% p_gene
dev.off()

5.14 (S6 E-F) RFS and EFS of TC1/2

The associations of the two metabolic clusters with clinical RFS (E) and EFS (F) outcomes in T-ALL patients. The results suggests TC1 could be of significantly poorer prognosis than TC2.

library(dplyr)
library(survival)
library(ggplot2)

#------------------------------------------------------------------------------
#  Step 1: RFS of TC1/2
#------------------------------------------------------------------------------
survival_new <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::select(PID,OS_stat,`OS (days)`,"EFS_stat","EFS (days)","RFS_stat","RFS (days)" ) %>%
  dplyr::rename("OS"="OS (days)","EFS"="EFS (days)","RFS"="RFS (days)")

clinical_survival <- data.table::fread(paste0("result/Figure4/4F_cluster_T.txt")) %>%
  as.data.frame() %>%
  dplyr::select(-OS_stat,-OS,-EFS_stat,-EFS,-RFS_stat,-RFS) %>%
  left_join(survival_new,by=c("Pid"="PID"))

surv_object <- Surv(time = clinical_survival$RFS/30, event = clinical_survival$RFS_stat)
fit_cluster <- survfit(surv_object ~ Clusters, data = clinical_survival)
p1 <- survminer::ggsurvplot(fit_cluster, data = clinical_survival, 
                            title="RFS",
                palette=c("#9BD53F","#A6CEE3","#EA644C"),
                            tables.theme = theme_void(),risk.table = TRUE,
                            risk.table.fontsize=6,
                            break.time.by = 12,
                            risk.table.y.text = FALSE, tables.height = 0.15,risk.table.col = "black",
                            pval = TRUE,pval.method = F,pval.coord=c(0.5,0.1),pval.size=6,
                            font.tickslab=c(14,"plain","black"),
                            xlab="Time (Months) ",ylab="RFS Survival")

pdf(paste0("result/Figure4/S6E_RFS_T.pdf"),onefile=FALSE)
p1
dev.off()

#------------------------------------------------------------------------------
#  Step 2: EFS
#------------------------------------------------------------------------------
clinical_survival <- data.table::fread(paste0("result/Figure4/4F_cluster_T.txt")) %>%
  as.data.frame()

surv_object <- Surv(time = clinical_survival$EFS/30, event = clinical_survival$EFS_stat)
fit_cluster <- survfit(surv_object ~ Clusters, data = clinical_survival)

p1 <- survminer::ggsurvplot(fit_cluster, data = clinical_survival, 
                            title="EFS",
                palette=c("#9BD53F","#A6CEE3","#EA644C"),
                            tables.theme = theme_void(),risk.table = TRUE,
                            risk.table.fontsize=6,
                            break.time.by = 12,
                            risk.table.y.text = FALSE, tables.height = 0.15,risk.table.col = "black",
                            pval = TRUE,pval.method = F,pval.coord=c(0.5,0.1),pval.size=6,
                            font.tickslab=c(14,"plain","black"),
                            xlab="Time (Months) ",ylab="EFS Survival")

pdf(paste0("result/Figure4/S6F_EFS_T.pdf"),onefile=FALSE)
p1
dev.off()

5.15 (S6 H) Multivariate Cox Analysis of TC1/2

The multivariate Cox analysis suggests TC1 may represent an unfavorable group independent of HSCT status. The WBC was stratified by 100×109/L, genetic risk was evaluated according to the NCCN guideline (V 2.2023) including RAS signaling, PTEN and NOTCH1/FBXW7, RAS/PTEN mutation and/or NOTCH1/FBXW7 wildtype were considered as having high risk, otherwise with low risk.

library(dplyr)
library(openxlsx)
library(survival)
library(survminer)

#------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#------------------------------------------------------------------------------
M_label <- data.table::fread("result/Figure4/4F_cluster_T.txt") %>%
  as.data.frame() %>%
  dplyr::mutate(Clusters=as.factor(Clusters)) %>%
  dplyr::select(Clusters,Pid)

ras <- data.table::fread("raw_data/RAS.txt") %>%
  as.data.frame()  %>%
  dplyr::mutate(name=stringr::str_to_upper(name))

mut <- readxl::read_excel("raw_data/Sample_RNA_190_fusion_mutation_v1130.xlsx") %>%
  dplyr::select(mutation,RNA_id,bianhao) %>%
  tidyr::separate_rows(mutation,sep=";") %>%
  dplyr::mutate(mutation=stringr::str_to_upper(mutation)) %>%
  dplyr::mutate(ras=ifelse(mutation %in% ras$name,"yes","no")) %>%
  dplyr::group_by(RNA_id,bianhao) %>%
  dplyr::summarise(Ras1=paste(ras,collapse = ";"),mutation1=paste(mutation,collapse = ";")) %>%
  dplyr::mutate(ras_new=ifelse(grepl("yes",Ras1),"yes","no"))

genetic <- read.xlsx("raw_data/Sample_RNA_190_fusion_mutation_v1130.xlsx") %>% 
  dplyr::select(bianhao,fusion,mutation) %>% 
  inner_join(mut,by="bianhao")

names(genetic)

T_gene <- genetic %>% 
  dplyr::select(mutation,bianhao,ras_new) %>% 
  dplyr::mutate(RAS_PTEN=ifelse(ras_new=="yes","MUT",
                                ifelse(grepl("PTEN",mutation),"MUT","WT"))) %>% 
  dplyr::mutate(NOTCH1_FBXW7=ifelse(grepl("NOTCH1",mutation),"MUT",
                                    ifelse(grepl("FBXW7",mutation),"MUT", "WT"))) %>% 
  dplyr::mutate(risk=ifelse(RAS_PTEN=="WT"& NOTCH1_FBXW7=="MUT","Low","High")) %>%
  dplyr::select(bianhao,risk) %>%
  dplyr::mutate(Pid=gsub("A","RJ-",bianhao))

##### new survival #####
clinical_survival <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::rename("OS"="OS (days)","EFS"="EFS (days)","RFS"="RFS (days)","ifHSCT"="HSCT_stat",
         "WBC"="WBC (10⁹/L)","Age"="Age (yrs)","Pid"="PID","RNA_id_raw"="RNA_ID_raw") %>%
  dplyr::select(Pid,RNA_id_raw,Age,Sex,WBC,ifHSCT,OS,OS_stat,EFS,EFS_stat,RFS,RFS_stat,D28_status) %>%
  dplyr::inner_join(M_label,by="Pid") %>%
  dplyr::mutate(WBC_type= ifelse(WBC > 100, "High",
                                 ifelse(WBC <= 100,"Low",WBC))) %>%
  dplyr::mutate(ifHSCT=as.factor(ifHSCT)) %>%
  dplyr::mutate(Age=ifelse(Age>40,"old","young")) %>%
  dplyr::mutate(Age=factor(Age,levels=c("young","old"))) %>%
  dplyr::mutate(Clusters=factor(Clusters)) %>%
  unique() %>%
  dplyr::left_join(T_gene,by="Pid") %>%
  dplyr::mutate(Clusters=factor(Clusters,levels=c("TC2","TC1"))) %>%
  dplyr::mutate(WBC_type=factor(WBC_type,levels=c("Low","High")))

var.names <- c("Clusters","WBC_type","D28_status","ifHSCT","risk")

#-------------------------------------------------------------------------------
#  Step 2: EFS of T-ALL
#-------------------------------------------------------------------------------
f_efs <- as.formula(paste('Surv(EFS, EFS_stat)~',paste(var.names,collapse = "+")))
fit.final <- coxph(f_efs,data=clinical_survival)
p_efs <- ggforest(fit.final,main="EFS Hzard Ratio",cpositions=c(0.02,0.22,0.4),fontsize=0.8,
                  refLabel="reference",noDigits=2,data=clinical_survival)

#-------------------------------------------------------------------------------
#  Step 3: OS of T-ALL
#-------------------------------------------------------------------------------
f_os <- as.formula(paste('Surv(OS, OS_stat)~',paste(var.names,collapse = "+")))
fit.os <- coxph(f_os,data=clinical_survival)
p_os <- ggforest(fit.os,main="OS Hzard Ratio",cpositions=c(0.02,0.22,0.4),fontsize=0.8,
                 refLabel="reference",noDigits=2,data=clinical_survival)
ggsave(p_efs,filename = "result/Figure4/S6H_HR_EFS_T.pdf",width=8,height=4)
ggsave(p_os,filename = "result/Figure4/S6H_HR_OS_T.pdf",width=8,height=4)

5.16 (S6 I) Waterfall plot of TC1/2

The associations of metabolic clusters with clinical variables and genetic alterations, including major mutations and fusions in T-ALL.

library(dplyr)
library(ComplexHeatmap)

#-------------------------------------------------------------------------------
#  Step 1: Load data and set parameters
#-------------------------------------------------------------------------------

clinical_survival <- readxl::read_excel("raw_data/Meta_Files.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::rename("OS"="OS (days)","EFS"="EFS (days)","RFS"="RFS (days)","ifHSCT"="HSCT_stat",
         "WBC"="WBC (10⁹/L)","Age"="Age (yrs)","Pid"="PID","RNA_id_raw"="RNA_ID_raw") %>%
  dplyr::select(Pid,RNA_id_raw,Age,Sex,WBC,ifHSCT,OS_stat,D28_status,Subtype) %>%
  dplyr::mutate(Age=ifelse(Age < 40,"< 40",">= 40")) %>%
  dplyr::mutate(WBC_type= ifelse(WBC > 100, "> 100",
                                 ifelse(WBC <= 100,"<= 100",NA))) %>%
  dplyr::mutate(Outcome=ifelse(OS_stat==1,"Dead",
                               ifelse(OS_stat==0,"Alive",OS_stat))) %>%
  dplyr::mutate(ifHSCT=ifelse(ifHSCT==0,"no",
                              ifelse(ifHSCT==1,"yes",ifHSCT)))

sample_info <- data.table::fread("result/Figure4/4F_cluster_T.txt") %>%
  as.data.frame() %>%
  dplyr::select(Clusters,Pid) %>%
  dplyr::mutate(bianhao=gsub("RJ-","A",Pid)) %>%
  dplyr::left_join(clinical_survival,by="Pid")

q12 <- c("#1F78B4","#E31A1C","black")
names(q12) <- unique(sample_info$Clusters)

top_annotation = HeatmapAnnotation(
  Subtype=sample_info$Clusters,
  Immunophenotype=sample_info$Subtype,
  ifHSCT=sample_info$ifHSCT,
  WBC=sample_info$WBC_type,
  Outcome=sample_info$Outcome,
  col=list(Subtype=c("TC1"="#9BD53F","TC2"="#A6CEE3"),
           Immunophenotype=c("ETP"="#F9EE35","non_ETP"="#E7317C"),
           ifHSCT=c("yes"="#CAB2D6","no"="gray"),
           WBC=c("> 100"="#FDBF6F","<= 100"="gray"),
           Outcome=c("Dead"="black","Alive"="gray")
  ),
  show_annotation_name = c(T,T), 
  border = c(T,T,T,T,T,T) ,
  simple_anno_size = unit(8, "mm"),gap = unit(1.5, "mm")
)

#-------------------------------------------------------------------------------
#  Step 2: Mutations of T-ALL
#-------------------------------------------------------------------------------
mutation_T <- data.table::fread("raw_data/mutation_T_v1114_all.txt") %>%
  as.data.frame() %>%
  dplyr::select(V2,V17,V19) %>%
  dplyr::rename("ID"="V2","Gene.refGene"="V17","ExonicFunc.refGene"="V19") %>%
  dplyr::select(ID,`Gene.refGene`,`ExonicFunc.refGene`) %>%
  dplyr::mutate(`ExonicFunc.refGene`=
                  ifelse(`ExonicFunc.refGene` %in% c("frameshift deletion","frameshift insertion","frameshift substitution"),"Frameshift",
                         ifelse(`ExonicFunc.refGene` %in% c("nonframeshift deletion","nonframeshift insertion","nonframeshift substitution"),"Nonframeshift",
                                ifelse(`ExonicFunc.refGene` %in% ".","Splice",
                                       ifelse(`ExonicFunc.refGene` %in% "nonsynonymous SNV","Missense",
                                              ifelse(`ExonicFunc.refGene`=="startloss","Startloss",
                                                     ifelse(`ExonicFunc.refGene`=="stopgain","Stopgain",
                                                            ifelse(`ExonicFunc.refGene`=="synonymous SNV","Silent",`ExonicFunc.refGene`)))))))) %>%
  unique() %>%
  dplyr::filter(ID %in% sample_info$RNA_id_raw) %>%
  dplyr::mutate(`Gene.refGene` = ifelse(`Gene.refGene` %in% c("IDH1","IDH2"),"IDH1/2",`Gene.refGene`))

mutation <- rbind(mutation_T)

hit_num <- mutation %>%
  dplyr::group_by(ID,`Gene.refGene`) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::filter(n>1)

mutation <- mutation %>% 
  dplyr::left_join(hit_num,by=c("ID","Gene.refGene")) %>%
  dplyr::mutate(`ExonicFunc.refGene`=ifelse(!is.na(n),"Multiple",`ExonicFunc.refGene`)) %>%
  dplyr::select(-n) %>%
  unique() %>%
  dplyr::rename("RNA_id"="ID") %>%
  dplyr::filter(RNA_id %in% sample_info$RNA_id_raw)

mutation_gene_filter <- mutation %>%
  dplyr::select(RNA_id,Gene.refGene) %>%
  unique() %>%
  dplyr::group_by(`Gene.refGene`) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::filter(n>0)
mutation_sort <- rev(names(sort(table(mutation %>% filter(`Gene.refGene` %in% mutation_gene_filter$Gene.refGene) %>% pull(Gene.refGene)))))

mutation <- mutation %>%
  dplyr::filter(`Gene.refGene` %in% mutation_gene_filter$Gene.refGene) %>%
  as.data.frame() %>%
  dplyr::distinct(RNA_id,Gene.refGene,.keep_all = T)

sample_info_filter <- sample_info %>%
  dplyr::select(bianhao,RNA_id_raw)

mutation_dcast <- reshape2::dcast(mutation,RNA_id ~ `Gene.refGene`) %>%
  dplyr::right_join(sample_info_filter,by=c("RNA_id"="RNA_id_raw")) %>%
  dplyr::select(-RNA_id) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::select(all_of(mutation_sort))

mutation_dcast[is.na(mutation_dcast)] <- 0
mutation_dcast[mutation_dcast=="Missense"] <- 1
mutation_dcast[mutation_dcast=="Frameshift"] <- 2
mutation_dcast[mutation_dcast=="Nonframeshift"] <- 3
mutation_dcast[mutation_dcast=="Silent"] <- 4
mutation_dcast[mutation_dcast=="Splice"] <- 5
mutation_dcast[mutation_dcast=="Stopgain"] <- 7
mutation_dcast[mutation_dcast=="Multiple"] <- 8

mutation_dcast1 <- mutation_dcast %>%
  dplyr::mutate_if(is.character,as.numeric) %>%
  rowwise() %>%
  dplyr::mutate(NOTCH=sum(NOTCH1)) %>%
  dplyr::mutate(PHF=sum(PTEN,PHF6)) %>%
  dplyr::mutate(RAS=sum(JAK3,JAK1,STAT5A,MTOR,NRAS,KRAS,TCF7,TCF7)) %>%
  dplyr::mutate(Transcript=sum(MED12,RUNX1,ETV6,CNOT3,MED12,GATA3,BCOR)) %>%
  dplyr::mutate(Epigenetic=sum(`IDH1/2`,DNMT3A,KMT2E,SUZ12,SETD1B,SETD2,NCOR2,
                               EP300,KDM6A)) %>%
  dplyr::mutate(Others=sum(CCND3,CTCF,BOD1L1,CCND3,SATB1,OBSCN,DNM2,CIC)) %>%
  dplyr::mutate(NOTCH=ifelse(NOTCH==0,0,9)) %>%
  dplyr::mutate(PHF=ifelse(PHF==0,0,9)) %>%
  dplyr::mutate(RAS=ifelse(RAS==0,0,9)) %>%
  dplyr::mutate(Transcript=ifelse(Transcript==0,0,9)) %>%
  dplyr::mutate(Epigenetic=ifelse(Epigenetic==0,0,9)) %>%
  dplyr::mutate(Others=ifelse(Others==0,0,9)) %>%
  dplyr::select(NOTCH1,NOTCH,
                PHF6,PTEN,PHF,
                JAK3,JAK1,MTOR,NRAS,KRAS,TCF7,TCF7,STAT5A,RAS,
                ETV6,RUNX1,MED12,CNOT3,MED12,GATA3,BCOR,Transcript,
                DNMT3A,SUZ12,SETD1B,`IDH1/2`,KMT2E,NCOR2,EP300,KDM6A,
                Epigenetic,
                CTCF,CCND3,CCND3,SATB1,OBSCN,DNM2,CIC,BOD1L1,Others) %>%
  as.data.frame()


rownames(mutation_dcast1) <- rownames(mutation_dcast)
mutation_dcast <- mutation_dcast1

mutation_dcast_temp <- mutation_dcast
mutation_dcast_temp[mutation_dcast_temp!=0] <- 1
num=colSums(mutation_dcast_temp)
names(mutation_dcast) <- paste0(names(mutation_dcast)," (n=",num,")")

d_mutation <- t(mutation_dcast) %>%
  as.data.frame() %>%
  dplyr::select(sample_info$bianhao)

mutation_annotation <- Heatmap(d_mutation,height=unit(15,"cm"),rect_gp = gpar(col = "gray", lwd = 2),
                               col=c("#EEEEEE","#3987CC","#DB3D3D","#663FFB","#FF7F0E","#CC66FF","#8B564C","#339933","blue"),
                               cluster_columns = F,cluster_rows = F,border = TRUE,column_title = NULL,
                               column_split =sample_info$Clusters,
                               show_column_names = TRUE,
                               top_annotation=top_annotation,
                               row_names_gp = gpar(fontsize = 8),column_names_gp=gpar(fontsize=6))

#-------------------------------------------------------------------------------
#  Step 3: Fusions of T-ALL
#-------------------------------------------------------------------------------
fusion_T <- readxl::read_excel("raw_data/fusion_T_v0803.xlsx",skip=1) %>%
  as.data.frame() %>%
  dplyr::filter(RNA_id %in% sample_info$RNA_id_raw) %>%
  dplyr::select(RNA_id,final_check) %>%
  dplyr::left_join(sample_info,by=c("RNA_id"="RNA_id_raw")) %>%
  dplyr::select(RNA_id,final_check) %>%
  dplyr::filter(!is.na(final_check))

fusion <- rbind(fusion_T)

clinical_fusion_dcast <- reshape2::dcast(fusion,RNA_id ~ final_check) %>%
  dplyr::right_join(sample_info_filter,by=c("RNA_id"="RNA_id_raw")) %>%
  tibble::column_to_rownames("bianhao") %>%
  dplyr::select(-RNA_id)

clinical_fusion_dcast[is.na(clinical_fusion_dcast)] <- 0
clinical_fusion_dcast[clinical_fusion_dcast != 0] <- 1

d_fusion <- t(clinical_fusion_dcast) %>%
  as.data.frame() %>%
  dplyr::select(sample_info$bianhao) %>%
  tibble::rownames_to_column(var="fusion") %>%
  dplyr::arrange(match(fusion,c("SET-NUP214","MLLT10 rearrangement","NUP98","STIL-TAL1",
                                "KMT2A rearrangement","other"))) %>%
  tibble::column_to_rownames("fusion")

fusion_annotation <- Heatmap(d_fusion,rect_gp = gpar(col = "gray", lwd = 2),
                             col=c("#EEEEEE","#444444"),
                             height=unit(2,"cm"),border = TRUE,
                             column_split =sample_info$Clusters,
                             cluster_columns = F,cluster_rows = F,column_title = NULL,
                             row_names_gp = gpar(fontsize = 8),column_names_gp=gpar(fontsize=6))

#-------------------------------------------------------------------------------
#  Step 4: Visualization
#-------------------------------------------------------------------------------
pdf(paste0("result/Figure4/S6I_oncoplot_T.pdf"),width=7,height = 10)
mutation_annotation %v% fusion_annotation
dev.off()

5.17 (S6 J) DA score of TC1/2

DA-score demonstrates that most of the metabolic pathways are upregulated in the relatively favorable TC2 cluster.

library(ggplot2)
library(MNet)
library(dplyr)

#------------------------------------------------------------------------------
#  Step 1: DA-score of gene_T
#------------------------------------------------------------------------------
cluster <- data.table::fread("result/Figure4/4F_cluster_T.txt") %>%
  as.data.frame() 
dat <- data.table::fread("raw_data/cell_dat_final_reuslt_v0329.txt") %>%
  dplyr::select(label,all_of(cluster$METc_BM_D0_ID)) %>%
  tibble::column_to_rownames("label")
group <- cluster$Clusters
table(group)
group[which(group== "TC1")] <- "tumor"
group[which(group== "TC2")] <- "normal"
metabolite_all <- mlimma(log1p(dat),group)
write.table(metabolite_all,"result/Figure4/S6J_metabolite_T.txt",quote=F,row.names=F,sep="\t")

dd <- metabolite_all %>%
  dplyr::mutate(Condition=ifelse(logFC>0 & P.Value < 0.05,"Up",
                          ifelse(logFC < 0 & P.Value < 0.05,"Down","normal")))

cluster <- data.table::fread("result/Figure4/4F_cluster_T.txt") %>%
  as.data.frame()

dat <- readRDS("raw_data/20231113_RNA_ALL_VST_coding.rds") %>%
  as.data.frame() %>%
  dplyr::select(all_of(cluster$RNA_id_raw))
group <- cluster$Clusters

group[which(group== "TC1")] <- "tumor"
group[which(group== "TC2")] <- "normal"

gene_all <- mlimma(dat,group)
 
write.table(gene_all,"result/Figure4/S6J_gene_T.txt",quote=F,row.names=F,sep="\t")

dat <- data.table::fread("result/Figure4/S6J_gene_T.txt")

dat_increase <- dat %>%
  dplyr::filter(logFC > 0) %>%
  dplyr::filter(P.Value < 0.05)
dat_decrease <- dat %>%
  dplyr::filter(logFC < 0) %>%
  dplyr::filter(P.Value < 0.05)

DA_result <- DAscore(dat_increase$name,dat_decrease$name,dat$name,min_measured_num = 10,out="gene")
DAscore_gene_T <- DA_result$result

#------------------------------------------------------------------------------
#  Step 2: DA-score of metabolite_T
#------------------------------------------------------------------------------
kegg_id <- readxl::read_excel("raw_data/cell_metabolite_info_all_v1109.xlsx") %>%
  as.data.frame() %>%
  dplyr::select(refmet_name,KEGG) %>%
  tidyr::separate_rows(KEGG,sep=";") %>%
  dplyr::filter(KEGG!="NA") %>%
  dplyr::filter(KEGG != "None") %>%
  as.data.frame()

dat <- data.table::fread("result/Figure4/S6J_metabolite_T.txt") %>%
  dplyr::arrange(desc(logFC)) %>%
  dplyr::inner_join(kegg_id,by=c("name"="refmet_name")) %>%
  dplyr::distinct(KEGG,.keep_all = TRUE)

dat_increase <- dat %>%
  dplyr::filter(logFC > 0) %>%
  dplyr::filter(P.Value < 0.05)
dat_decrease <- dat %>%
  dplyr::filter(logFC < 0) %>%
  dplyr::filter(P.Value < 0.05)

DA_result <- DAscore(dat_increase$KEGG,dat_decrease$KEGG,dat$KEGG,min_measured_num = 3,out="metabolite")
write.table(DA_result$result,"result/Figure4/S6J_DAscore_metabolite_T.txt",quote=F,row.names=F,sep="\t")

#------------------------------------------------------------------------------
#  Step 3: DA-score of gene+metabolite_T
#------------------------------------------------------------------------------
gene <- DAscore_gene_T %>%
  as.data.frame() %>%
  dplyr::filter(Measured_members_num >= 10) %>%
  dplyr::filter(abs(DA_score) > 0.05)

metabolite <- data.table::fread("result/Figure4/S6J_DAscore_metabolite_T.txt") %>%
  as.data.frame() %>%
  dplyr::filter(Measured_members_num >= 3) %>%
  dplyr::filter(DA_score != 0)

all_1 <- gene %>%
  dplyr::inner_join(metabolite,by="Pathway") %>%
  dplyr::filter(DA_score.x >0 & DA_score.y >0)
  
all_2 <- gene %>%
  dplyr::inner_join(metabolite,by="Pathway") %>%
  dplyr::filter(DA_score.x < 0 & DA_score.y < 0)

path <- c(all_1$Pathway,all_2$Pathway)

colp <- c("Amino acid metabolism" ="#1B9E77",
          "Carbohydrate metabolism"="#D95F02",
          "Glycan biosynthesis and metabolism"="#1F78B4", 
          "Metabolism of cofactors and vitamins"="#7570B3",
          "Metabolism of terpenoids and polyketides"="#BC80BD",
          "Metabolism of other amino acids"="#8DD3C7",
          "Energy metabolism"="#E7298A",
          "Lipid metabolism"="#66A61E",
          "Nucleotide metabolism"="#E6AB02",
          "Biosynthesis of other secondary metabolites"="#A6761D",
          "Xenobiotics biodegradation and metabolism"="#666666")

metabolite_filter <- metabolite %>%
  dplyr::filter(Pathway %in% path) %>%
  dplyr::filter(Measured_members_num >= 3) %>%
  dplyr::filter(abs(DA_score) > 0.05) %>%
  dplyr::arrange(`Pathway Category`) %>%
  dplyr::mutate(Pathway=factor(Pathway,levels = Pathway))

colp <- c("Amino acid metabolism" ="#1B9E77",
          "Carbohydrate metabolism"="#D95F02",
          "Glycan biosynthesis and metabolism"="#1F78B4", 
          "Metabolism of cofactors and vitamins"="#7570B3",
          "Metabolism of terpenoids and polyketides"="#BC80BD",
          "Metabolism of other amino acids"="#8DD3C7",
          "Energy metabolism"="#E7298A",
          "Lipid metabolism"="#66A61E",
          "Nucleotide metabolism"="#E6AB02",
          "Biosynthesis of other secondary metabolites"="#A6761D",
          "Xenobiotics biodegradation and metabolism"="#666666")

p <- ggplot(metabolite_filter)+
  geom_point(aes(x=Pathway,y=DA_score,size=log2(Measured_members_num),color=`Pathway Category`))+
  geom_pointrange(aes(x=Pathway,y=DA_score,ymin=0,ymax=DA_score,color=`Pathway Category`))+
  scale_color_manual(values=colp)+
  coord_flip()+
  xlab(NULL)+
  theme_bw()

ggsave("result/Figure4/S6J_DAscore_metabolite_T_filter.pdf",p,width=8,height = 3.5)

5.18 (S6 K-L) GSEA between TC1 and TC2

The GSEA reveals that the TC1 cluster, associated with poorer prognosis, exhibits suppression in OXPHOS and enrichment in histone modifications.

#------------------------------------------------------------------------------
#  Step 1: Oxidative phosphorylation
#------------------------------------------------------------------------------
library(dplyr)
library(ggplot2)

## Load data
gene_all <- data.table::fread("result/Figure4/S6J_gene_T.txt") %>%
  as.data.frame()

a <- readLines("raw_data/OXIDATIVE.txt")
gmt <- strsplit(a, "\t")
names(gmt) <- vapply(gmt, function(y) y[1], character(1))
gmt1 <- lapply(gmt, "[", -c(1:2))

a_temp <- gene_all$logFC
names(a_temp) <- gene_all$name
  ##GSEA
  Ranks_all <- sort(a_temp,decreasing =TRUE)
  gseaParam = 1
  minSize=5
  ticksSize = 0.2
  Ranks_all = sort(a_temp,decreasing =TRUE)
  fgseaRes_all <- fgsea::fgsea(gmt1, sort(a_temp,decreasing =TRUE),minSize = minSize,nPermSimple=20000,gseaParam=1)
  
  pval <- fgseaRes_all %>% 
    dplyr::pull(pval) %>%
    round(3)
  nes <- fgseaRes_all %>% 
    dplyr::pull(NES) %>%
    round(3)
  
  pathway <- gmt1[[1]]
  
  stats <- Ranks_all
  rnk <- rank(-stats)
  ord <- order(rnk)
  statsAdj <- stats[ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
  statsAdj <- statsAdj/max(abs(statsAdj))
  pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
  pathway <- sort(pathway)
  gseaRes <- fgsea::calcGseaStat(statsAdj, selectedStats = pathway, 
                                 returnAllExtremes = TRUE)
  bottoms <- gseaRes$bottoms
  tops <- gseaRes$tops
  n <- length(statsAdj)
  xs <- as.vector(rbind(pathway - 1, pathway))
  ys <- as.vector(rbind(bottoms, tops))
  toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0))
  diff <- (max(tops) - min(bottoms))/8
  x = y = NULL
  g1 <- ggplot2::ggplot(toPlot, aes(x = x, y = y)) +
    ggplot2::geom_point(color = "green", size = 0.1) +  
    ggplot2::geom_hline(yintercept = 0, 
                        colour = "gray") + geom_line(color = "green") +
    ggplot2::geom_line(color = "green") + theme_bw() + 
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   axis.text.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank()) +
    ggplot2::labs(x = NULL, y = "enrichment score",title=)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(5,5,0,5), "mm"))
  
  g2 <- ggplot2::ggplot(toPlot, aes(x = x, y = y))+
    ggplot2::geom_segment(data = data.frame(x = pathway), 
                          mapping = aes(x = x, 
                                        y = -1, xend = x, yend = 1),
                          size = ticksSize,color="red") +
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+theme_bw()+
    ggplot2::theme(panel.grid = element_blank(),
                   axis.text.x=element_blank(),axis.ticks.x=element_blank(),
                   axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    ggplot2::labs(x=NULL,y=NULL)+
    ggplot2::theme(plot.margin = unit(c(0,5,0,5), "mm"))
  
  dat <- data.frame(name=1:length(statsAdj),value=statsAdj)
  
  g3 <- ggplot2::ggplot(dat,aes(name,value))+
    ggplot2::geom_bar(stat="identity",fill="gray")+
    ggplot2::theme_bw()+
    ggplot2::theme(panel.grid = element_blank())+
    ggplot2::labs(x=NULL)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits=c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(0,5,5,5), "mm"))
  
  p1 <- cowplot::plot_grid(plotlist = list(g1,g2,g3),nrow=3,
                           rel_heights = c(1,0.2,0.5),scale=1,
                           align ="v",labels=paste0("OXIDATIVE_PHOSPHORYLATION"," pval=",pval,";NES=",nes),label_size = 8)
  ggsave("result/Figure4/S6K_GSEA_T_OXIDATIVE.pdf",p1,width=5,height = 5)

#------------------------------------------------------------------------------
#  Step 2: Histone modifications
#------------------------------------------------------------------------------
## Load data
gene_all <- data.table::fread("result/Figure4/S6J_gene_T.txt") %>%
  as.data.frame()

a <- readLines("raw_data/histone.txt")
gmt <- strsplit(a, "\t")
names(gmt) <- vapply(gmt, function(y) y[1], character(1))
gmt1 <- lapply(gmt, "[", -c(1:2))

a_temp <- gene_all$logFC
names(a_temp) <- gene_all$name

##GSEA
  Ranks_all <- sort(a_temp,decreasing =TRUE)
  gseaParam = 1
  minSize=5
  ticksSize = 0.2
  Ranks_all = sort(a_temp,decreasing =TRUE)
  fgseaRes_all <- fgsea::fgsea(gmt1, sort(a_temp,decreasing =TRUE),minSize = minSize,nPermSimple=20000,gseaParam=0.5)
  
  pval <- fgseaRes_all %>% 
    dplyr::pull(pval) %>%
    round(3)
  nes <- fgseaRes_all %>% 
    dplyr::pull(NES) %>%
    round(3)
  
  pathway <- gmt1[[1]]
  
  stats <- Ranks_all
  rnk <- rank(-stats)
  ord <- order(rnk)
  statsAdj <- stats[ord]
  statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
  statsAdj <- statsAdj/max(abs(statsAdj))
  pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
  pathway <- sort(pathway)
  gseaRes <- fgsea::calcGseaStat(statsAdj, selectedStats = pathway, 
                                 returnAllExtremes = TRUE)
  bottoms <- gseaRes$bottoms
  tops <- gseaRes$tops
  n <- length(statsAdj)
  xs <- as.vector(rbind(pathway - 1, pathway))
  ys <- as.vector(rbind(bottoms, tops))
  toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0))
  diff <- (max(tops) - min(bottoms))/8
  x = y = NULL
  g1 <- ggplot2::ggplot(toPlot, aes(x = x, y = y)) +
    ggplot2::geom_point(color = "green", size = 0.1) +  
    ggplot2::geom_hline(yintercept = 0, 
                        colour = "gray") + geom_line(color = "green") +
    ggplot2::geom_line(color = "green") + theme_bw() + 
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   axis.text.x=ggplot2::element_blank(),axis.ticks.x=ggplot2::element_blank()) +
    ggplot2::labs(x = NULL, y = "enrichment score",title=)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(5,5,0,5), "mm"))
  
  g2 <- ggplot2::ggplot(toPlot, aes(x = x, y = y))+
    ggplot2::geom_segment(data = data.frame(x = pathway), 
                          mapping = aes(x = x, 
                                        y = -1, xend = x, yend = 1),
                          size = ticksSize,color="red") +
    ggplot2::scale_x_continuous(expand = c(0, 0),limits = c(0,length(statsAdj)+1))+theme_bw()+
    ggplot2::theme(panel.grid = element_blank(),
                   axis.text.x=element_blank(),axis.ticks.x=element_blank(),
                   axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    ggplot2::labs(x=NULL,y=NULL)+
    ggplot2::theme(plot.margin = unit(c(0,5,0,5), "mm"))
  
  dat <- data.frame(name=1:length(statsAdj),value=statsAdj)
  
  g3 <- ggplot2::ggplot(dat,aes(name,value))+
    ggplot2::geom_bar(stat="identity",fill="gray")+
    ggplot2::theme_bw()+
    ggplot2::theme(panel.grid = element_blank())+
    ggplot2::labs(x=NULL)+
    ggplot2::scale_x_continuous(expand = c(0, 0),limits=c(0,length(statsAdj)+1))+
    ggplot2::theme(plot.margin = unit(c(0,5,5,5), "mm"))
  
  p1 <- cowplot::plot_grid(plotlist = list(g1,g2,g3),nrow=3,
                           rel_heights = c(1,0.2,0.5),scale=1,
                           align ="v",labels=paste0("histone"," pval=",pval,";NES=",nes),label_size = 8)
  ggsave("result/Figure4/S6L_GSEA_T_histone.pdf",p1,width=4,height = 4)