1 Figure 1

Fig. 1: Study design overview and tumor-specific pathway hallmarks in PDAC (a) Workflow depicting the cohort-scale integration of clinical and proteomic data. (b) PCA plot displaying analyzable proteins in treated-naïve tumor and their paired tumor adjacent control (TAC) samples. Each dot represents a sample, with blue dots for TAC samples and red dots for tumor samples. The % value indicates the explained variance. (c) Volcano plot displaying differentially abundant proteins between tumors and TACs. Each dot represents a protein, with red dots for proteins significantly up-regulated in tumors, and blue dots for proteins significantly down-regulated in tumors. P-values were calculated using the R package limma and adjusted using the Benjamini & Hochberg (BH) method. The significance threshold was defined as up-regulated in tumors (adjusted P-value < 0.05, log2 fold change [log2FC] > 0.58 [FC>1.5]), down-regulated in tumors (adjusted P-value < 0.05, log2FC < -0.58), or otherwise considered not significant. (d) Dot plot illustrating enriched pathways identified based on significantly up/down-regulated proteins. Red dots represent pathways enriched (adjusted P-value < 0.05) for proteins up-regulated in tumors compared to TACs, while blue dots represent pathways enriched (adjusted P-value < 0.05) for proteins down-regulated in tumors.

1.1 (b) PCA

Principal-component analyses (PCAs) involving 4,787 analyzable proteins revealed a clear separation between tumors and TACs.

library(scatterplot3d)
library(tidyverse)
library(openxlsx)
library(factoextra) # Extract and Visualize the Results of Multivariate Data Analyses
color.bin <- c("#00599F","#D01910")
dir.create("./results/Figure1",recursive = T)
#----------------------------------------------------------------------------------
#  Step 1: Load Proteomics data
#----------------------------------------------------------------------------------
pro  <- readRDS("./data/proteomics/20230412_PDAC_PRO_exp.rds") # 4787 protein * 281 samples
meta <- readRDS("./data/proteomics/20230412_PDAC_PRO_meta.rds")
identical(rownames(meta),colnames(pro)) # check names
#----------------------------------------------------------------------------------
#  Step 2: PCA
#----------------------------------------------------------------------------------
res.pca.comp <- prcomp(pro, scale = F)
#----------------------------------------------------------------------------------
#  Step 3: Plot
#----------------------------------------------------------------------------------
plot.data <- as.data.frame(res.pca.comp$rotation[, 1:10])
plot.data <- plot.data %>% 
              mutate(ID=rownames(plot.data),
                     Type=meta$Type,
                     TypeColor=color.bin[as.numeric(as.factor(Type))])
pdf("./results/Figure1/1B.PRO_PCA3d.pdf", width = 7, height = 7)
scatterplot3d(x = plot.data$PC2, 
              y = plot.data$PC1, 
              z = plot.data$PC3,
              color = plot.data$TypeColor,
              pch = 16, cex.symbols = 1,
              scale.y = 0.7, angle = 45,
              xlab = "PC2", ylab = "PC1", zlab = "PC3",
              main="3D Scatter Plot of proteomics",
              col.axis = "#444444", col.grid = "#CCCCCC")
legend("bottom", legend = levels(as.factor(meta$Type)),
      col =  color.bin,  pch = 16,
      inset = -0.15, xpd = TRUE, horiz = TRUE)
dev.off()
write.xlsx(plot.data, "./results/Figure1/1B.PRO_PCA3d.xlsx", overwrite = T)

# variance percent of each PC
p <- fviz_eig(res.pca.comp)
var_explained <- get_eig(res.pca.comp)
# var_explained <- res.pca.comp$sdev^2 / sum(res.pca.comp$sdev^2)
ggsave("./results/Figure1/1B.PRO_PCA_percent.pdf",p,width = 5, height = 5)
write.xlsx(var_explained, "./results/Figure1/1B.PRO_PCA_percant.xlsx",rowNames=T, overwrite = T)

1.2 (c) Volcano plot

Volcano plot displaying differentially expressed proteins between tumors and TACs, with 1,213 proteins significantly up-regulated and 864 proteins down-regulated in tumors as compared to TACs.

library(limma)
library(tidyverse)
library(openxlsx)
library(ggpubr)
library(ggthemes)
#----------------------------------------------------------------------------------
#  Step 1: Load Proteomics data
#----------------------------------------------------------------------------------
exp  <- readRDS("./data/proteomics/20230412_PDAC_PRO_exp.rds") # 4787 protein * 281 samples
meta <- readRDS("./data/proteomics/20230412_PDAC_PRO_meta.rds")
identical(rownames(meta),colnames(exp)) # check names
#----------------------------------------------------------------------------------
#  Step 2: limma
#----------------------------------------------------------------------------------
meta      <- meta %>% mutate(contrast=as.factor(Type)) 
design    <- model.matrix(~ 0 + contrast , data = meta) # un-paired
fit       <- lmFit(exp, design)
contrast  <- makeContrasts( Tumor_Normal = contrastT - contrastN , levels = design)
fits      <- contrasts.fit(fit, contrast)
ebFit     <- eBayes(fits)
limma.res <- topTable(ebFit, coef = "Tumor_Normal", adjust.method = 'fdr', number = Inf)
## result
limma.res <- limma.res %>% filter(!is.na(adj.P.Val)) %>% 
              mutate( logP = -log10(adj.P.Val) ) %>%
              mutate( tag = "Tumor -vs- Normal")%>%
              mutate( Gene = ID)
# cutoff:  FC:1.5   adj.p:0.05
limma.res <- limma.res %>% mutate(group=case_when(
                                  (adj.P.Val < 0.05 & logFC > 0.58) ~ "up",
                                  (adj.P.Val < 0.05 & logFC < -0.58) ~ "down",
                                  .default = "not sig"))
table(limma.res$group) # UP:1213 ; DOWN:864 ; not:2710
## output
write.xlsx( limma.res, "./results/Figure1/1C.PRO_Limma_fc1.5.xlsx", overwrite = T, rowNames = F) 
#----------------------------------------------------------------------------------
#  Step 3: Vasualization 
#----------------------------------------------------------------------------------
## volcano
limma.res <- limma.res %>% mutate(group=factor(group,levels = c("up","down","not sig")))
my_label <- paste0( "FC:1.5 ; AdjP:0.05 ; ",
                    "Up:",table(limma.res$group)[1]," ; ",
                    "Down:",table(limma.res$group)[2])
p <- ggscatter(limma.res,
               x = "logFC", y = "logP",
               color = "group", size = 2,
               main = paste0("Tumor -vs- TAC"), # ***
               xlab = "log2FoldChange", ylab = "-log10(adjusted P.value)",
               palette = c("#D01910","#00599F","#CCCCCC"),
               ylim = c(-1, 70),xlim=c(-8,8))+
  theme_base()+
  geom_hline(yintercept = -log10(0.05), linetype="dashed", color = "#222222") +
  geom_vline(xintercept = 0.58 , linetype="dashed", color = "#222222") +
  geom_vline(xintercept = -0.58, linetype="dashed", color = "#222222") +
  labs(subtitle = my_label)+
  theme(plot.background = element_blank())
ggsave("./results/Figure1/1C.PRO_Limma_fc1.5.pdf", p, width = 10, height = 10)

1.3 (d) Enrichment plot

Dot plot illustrating enriched pathways identified based on significantly up/down-regulated proteins. Red dots represent pathways enriched (adjusted P-value < 0.05) for proteins up-regulated in tumors compared to TACs, while blue dots represent pathways enriched (adjusted P-value < 0.05) for proteins down-regulated in tumors.

# data : enriched pathways table
plot.data <- read.xlsx("./data/Extended Data Table 3.xlsx", sheet = 2, startRow = 2)
plot.pathway <- c("GO:0006730~one-carbon metabolic process","GO:0006888~ER to Golgi vesicle-mediated transport","hsa00020:Citrate cycle (TCA cycle)","hsa00071:Fatty acid degradation","hsa00190:Oxidative phosphorylation","hsa00250:Alanine, aspartate and glutamate metabolism","hsa00260:Glycine, serine and threonine metabolism","hsa00280:Valine, leucine and isoleucine degradation","hsa00480:Glutathione metabolism","hsa00520:Amino sugar and nucleotide sugar metabolism","hsa00620:Pyruvate metabolism","hsa00630:Glyoxylate and dicarboxylate metabolism","hsa00640:Propanoate metabolism","hsa01100:Metabolic pathways","hsa01200:Carbon metabolism","hsa01212:Fatty acid metabolism","hsa01240:Biosynthesis of cofactors","hsa03010:Ribosome","hsa03060:Protein export","hsa04141:Protein processing in endoplasmic reticulum","hsa04972:Pancreatic secretion","GO:0001916~positive regulation of T cell mediated cytotoxicity","GO:0006096~glycolytic process","GO:0007165~signal transduction","GO:0007266~Rho protein signal transduction","GO:0045087~innate immune response","GO:0050778~positive regulation of immune response","GO:0050853~B cell receptor signaling pathway","GO:0050870~positive regulation of T cell activation","GO:0071346~cellular response to interferon-gamma","hsa04015:Rap1 signaling pathway","hsa04062:Chemokine signaling pathway","hsa04066:HIF-1 signaling pathway","hsa04151:PI3K-Akt signaling pathway","hsa04512:ECM-receptor interaction","hsa04610:Complement and coagulation cascades","hsa04621:NOD-like receptor signaling pathway","hsa04666:Fc gamma R-mediated phagocytosis")
plot.data <- plot.data %>% filter(Term %in% plot.pathway) %>% 
                    mutate(LogFDR= -log10(FDR))
# plot
color.bin <- c("#00599F","#D01910")
p <- ggscatter(plot.data, x = "LogFDR", y = "Fold.Enrichment", color = "Type",
               main = "Enrichment of tumor/TAC protein",
               size = "Ratio", shape = 16,
               label = plot.data$Term, palette = color.bin) + theme_base()
p <- p + scale_size(range = c(4,20)) + 
         scale_x_continuous(limit = c(-10, 40)) + 
         theme(plot.background = element_blank())
ggsave(paste0("./results/Figure1/1D.PRO_deg.tn_enrich.pdf"), p, width = 11, height = 10)