In this demo, the PIC2 is applied to the latest critical Covid-19 GWAS summary-level data; see PMID:35255492, generating the target index PIC2Target
and the drug index PIC2Drug
.
# Please install R (version 4.2.1 or above)
# see https://cran.r-project.org
# if the package 'BiocManager' not installed, please do so
if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# first, install basic packages: remotes, tidyverse, pbapply, ComplexHeatmap
BiocManager::install(c('remotes','tidyverse','pbapply','ComplexHeatmap'), dependencies=T)
# then, install the package 'PIC2' (now hosted at github)
BiocManager::install("hfang-bristol/PIC2", dependencies=T, force=T)
# check the package 'PIC2' successfully installed
library(PIC2)
# load packages
library(PIC2)
library(tidyverse)
library(patchwork)
# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_pic2"
# create the output folder 'pic2_showcase'
outdir <- 'pic2_showcase'
if(!dir.exists(outdir)) dir.create(outdir)
# read GWAS summary data as input
COVID19_GWAS.35255492 <- oRDS("COVID19_GWAS.35255492", placeholder=placeholder)
input_data <- COVID19_GWAS.35255492 %>% GenomicRanges::mcols() %>% as_tibble() %>% filter(p<5e-8) %>% transmute(snp, pvalue=p)
# SNPs in linkage disequilibrium according to the European population
LD.EUR <- oRDS('GWAS_LD.EUR', placeholder=placeholder)
# All common SNPs (primarily sourced from dbSNP)
#getOption("timeout"): 60
options(timeout=120) # please increase the timeout if failed to download
GR.SNP <- oRDS("dbSNP_Common", placeholder=placeholder)
# All known genes (primarily sourced from UCSC)
GR.Gene <- oRDS("UCSC_knownGene", placeholder=placeholder)
df_GR_gene <- GR.Gene %>% as_tibble()
# Functional interaction network (primarily sourced from STRING)
# restricted to high-quality interactions ("experiments" and "databases")
STRING_fin <- oRDS("STRING_fin", placeholder=placeholder)
# Random walk with restart
# restarting probability
restart <- 0.5
# include.RGB (53)
include.RGB <- c("PCHiC_PMID27863249_Activated_total_CD4_T_cells","PCHiC_PMID27863249_Endothelial_precursors","PCHiC_PMID27863249_Erythroblasts","PCHiC_PMID27863249_Fetal_thymus","PCHiC_PMID27863249_Macrophages_M0","PCHiC_PMID27863249_Macrophages_M1","PCHiC_PMID27863249_Macrophages_M2","PCHiC_PMID27863249_Megakaryocytes","PCHiC_PMID27863249_Monocytes","PCHiC_PMID27863249_Naive_B_cells","PCHiC_PMID27863249_Naive_CD4_T_cells","PCHiC_PMID27863249_Naive_CD8_T_cells","PCHiC_PMID27863249_Neutrophils","PCHiC_PMID27863249_Nonactivated_total_CD4_T_cells","PCHiC_PMID27863249_Total_B_cells","PCHiC_PMID27863249_Total_CD4_T_cells","PCHiC_PMID27863249_Total_CD8_T_cells", "PCHiC_PMID31501517_AdrenalGland","PCHiC_PMID31501517_Aorta","PCHiC_PMID31501517_Bladder","PCHiC_PMID31501517_Cardiomyocytes","PCHiC_PMID31501517_DorsolateralPrefrontalCortex","PCHiC_PMID31501517_Esophagus","PCHiC_PMID31501517_Fat","PCHiC_PMID31501517_Fibroblast","PCHiC_PMID31501517_Gastric","PCHiC_PMID31501517_H1","PCHiC_PMID31501517_H1MesenchymalStemCell","PCHiC_PMID31501517_H1MesendodermStemCell","PCHiC_PMID31501517_H1NeuralProgenitorCell","PCHiC_PMID31501517_Hippocampus","PCHiC_PMID31501517_LCL","PCHiC_PMID31501517_LeftHeartVentricle","PCHiC_PMID31501517_Liver","PCHiC_PMID31501517_Lung","PCHiC_PMID31501517_Ovary","PCHiC_PMID31501517_Pancreas","PCHiC_PMID31501517_Psoas","PCHiC_PMID31501517_RightHeartVentricle","PCHiC_PMID31501517_RightHeatAtrium","PCHiC_PMID31501517_SigmoidColon","PCHiC_PMID31501517_SmallBowel","PCHiC_PMID31501517_Spleen","PCHiC_PMID31501517_Thymus","PCHiC_PMID31501517_Trophoblast", "PCHiC_PMID31367015_astrocytes","PCHiC_PMID31367015_excitatory","PCHiC_PMID31367015_hippocampal","PCHiC_PMID31367015_motor", "PCHiC_PMID31253982_islet","PCHiC_PMID29955040_CMhESC", "PCHiC_PMID25938943_CD34","PCHiC_PMID25938943_GM12878")
# include.QTL (114)
include.QTL <- c("pQTL_Plasma_PMID34857953", "eQTL_eQTLGen")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNg","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNgSalmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_Salmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_naive","eQTL_eQTLCatalogue_BLUEPRINT_Tcell","eQTL_eQTLCatalogue_BLUEPRINT_monocyte","eQTL_eQTLCatalogue_BLUEPRINT_neutrophil","eQTL_eQTLCatalogue_BrainSeq_brain","eQTL_eQTLCatalogue_CEDAR_microarray_Bcell_CD19","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD4","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD8","eQTL_eQTLCatalogue_CEDAR_microarray_ileum","eQTL_eQTLCatalogue_CEDAR_microarray_monocyte_CD14","eQTL_eQTLCatalogue_CEDAR_microarray_neutrophil_CD15","eQTL_eQTLCatalogue_CEDAR_microarray_platelet","eQTL_eQTLCatalogue_CEDAR_microarray_rectum","eQTL_eQTLCatalogue_CEDAR_microarray_transverse_colon","eQTL_eQTLCatalogue_FUSION_adipose_naive","eQTL_eQTLCatalogue_FUSION_muscle_naive","eQTL_eQTLCatalogue_Fairfax_2012_microarray_Bcell_CD19","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_IFN24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS2","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_naive","eQTL_eQTLCatalogue_GENCORD_LCL","eQTL_eQTLCatalogue_GENCORD_Tcell","eQTL_eQTLCatalogue_GENCORD_fibroblast","eQTL_eQTLCatalogue_GEUVADIS_LCL")
include.QTL <- c(include.QTL,"eQTL_eQTLCatalogue_GTEx_V8_Adipose_Subcutaneous","eQTL_eQTLCatalogue_GTEx_V8_Adipose_Visceral_Omentum","eQTL_eQTLCatalogue_GTEx_V8_Adrenal_Gland","eQTL_eQTLCatalogue_GTEx_V8_Artery_Aorta","eQTL_eQTLCatalogue_GTEx_V8_Artery_Coronary","eQTL_eQTLCatalogue_GTEx_V8_Artery_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Brain_Amygdala","eQTL_eQTLCatalogue_GTEx_V8_Brain_Anterior_cingulate_cortex_BA24","eQTL_eQTLCatalogue_GTEx_V8_Brain_Caudate_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellar_Hemisphere","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellum","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Brain_Frontal_Cortex_BA9","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hippocampus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hypothalamus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Nucleus_accumbens_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Putamen_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Spinal_cord_cervical_c1","eQTL_eQTLCatalogue_GTEx_V8_Brain_Substantia_nigra","eQTL_eQTLCatalogue_GTEx_V8_Breast_Mammary_Tissue","eQTL_eQTLCatalogue_GTEx_V8_Cells_Cultured_fibroblasts","eQTL_eQTLCatalogue_GTEx_V8_Cells_EBVtransformed_lymphocytes","eQTL_eQTLCatalogue_GTEx_V8_Colon_Sigmoid","eQTL_eQTLCatalogue_GTEx_V8_Colon_Transverse","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Gastroesophageal_Junction","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Mucosa","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Muscularis","eQTL_eQTLCatalogue_GTEx_V8_Heart_Atrial_Appendage","eQTL_eQTLCatalogue_GTEx_V8_Heart_Left_Ventricle","eQTL_eQTLCatalogue_GTEx_V8_Kidney_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Liver","eQTL_eQTLCatalogue_GTEx_V8_Lung","eQTL_eQTLCatalogue_GTEx_V8_Minor_Salivary_Gland","eQTL_eQTLCatalogue_GTEx_V8_Muscle_Skeletal","eQTL_eQTLCatalogue_GTEx_V8_Nerve_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Ovary","eQTL_eQTLCatalogue_GTEx_V8_Pancreas","eQTL_eQTLCatalogue_GTEx_V8_Pituitary","eQTL_eQTLCatalogue_GTEx_V8_Prostate","eQTL_eQTLCatalogue_GTEx_V8_Skin_Not_Sun_Exposed_Suprapubic","eQTL_eQTLCatalogue_GTEx_V8_Skin_Sun_Exposed_Lower_leg","eQTL_eQTLCatalogue_GTEx_V8_Small_Intestine_Terminal_Ileum","eQTL_eQTLCatalogue_GTEx_V8_Spleen","eQTL_eQTLCatalogue_GTEx_V8_Stomach","eQTL_eQTLCatalogue_GTEx_V8_Testis","eQTL_eQTLCatalogue_GTEx_V8_Thyroid","eQTL_eQTLCatalogue_GTEx_V8_Uterus","eQTL_eQTLCatalogue_GTEx_V8_Vagina","eQTL_eQTLCatalogue_GTEx_V8_Whole_Blood")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_HipSci_iPSC","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD4","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD8","eQTL_eQTLCatalogue_Lepik_2017_blood","eQTL_eQTLCatalogue_Naranbhai_2015_microarray_neutrophil_CD16","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Listeria","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Salmonella","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_naive","eQTL_eQTLCatalogue_Quach_2016_monocyte_IAV","eQTL_eQTLCatalogue_Quach_2016_monocyte_LPS","eQTL_eQTLCatalogue_Quach_2016_monocyte_Pam3CSK4","eQTL_eQTLCatalogue_Quach_2016_monocyte_R848","eQTL_eQTLCatalogue_Quach_2016_monocyte_naive","eQTL_eQTLCatalogue_ROSMAP_brain_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Bcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_NKcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Tfh_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th117_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th17_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th1_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th2_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_CD16_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_naive","eQTL_eQTLCatalogue_Schwartzentruber_2018_sensory_neuron","eQTL_eQTLCatalogue_TwinsUK_LCL","eQTL_eQTLCatalogue_TwinsUK_blood","eQTL_eQTLCatalogue_TwinsUK_fat","eQTL_eQTLCatalogue_TwinsUK_skin","eQTL_eQTLCatalogue_van_de_Bunt_2015_pancreatic_islet")
# prepare predictors (it takes 9 min or so)
ls_pNode_genomic <- oPierSNPsAdv(data=input_data, LD.customised=LD.EUR, GR.SNP=GR.SNP, GR.Gene=GR.Gene, include.QTL=include.QTL, include.RGB=include.RGB, network.customised=STRING_fin, restart=restart, placeholder=placeholder)
# save into a file 'ls_pNode_genomic.RData' under the folder 'pic2_showcase'
save(list='ls_pNode_genomic', file=file.path(outdir,'ls_pNode_genomic.RData'))
# do prioritisation
ls_pNode <- Filter(Negate(is.null), ls_pNode_genomic)
dTarget <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy="logistic", GR.Gene=GR.Gene, placeholder=placeholder)
# write into a file 'PIC2_priority.txt.gz' under the folder 'pic2_showcase'
dTarget$priority %>% select(name,rating,rank,seed,nGene,eGene,cGene,description) %>% write_delim(file.path(outdir,'PIC2_priority.txt.gz'), delim='\t')
# save into a file 'dTarget_PIC2.RData' under the folder 'pic2_showcase'
save(list='dTarget', file=file.path(outdir,'dTarget_PIC2.RData'))
Target genes stored in the output file PIC2_priority.txt.gz
above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating
).
# Extract the top two prioritised genes per chromosome
top.label.query <- dTarget$priority %>% inner_join(GR.Gene %>% as.data.frame() %>% as_tibble(), by=c("name"="Symbol")) %>% group_by(seqnames) %>% top_n(2,rating) %>% pull(name)
# draw manhattan plot
gp_manhattan_chr <- oPierManhattan(dTarget, top.label.query=top.label.query, y.lab="Priority rating for genes (ie TARGET INDEX)", GR.Gene=GR.Gene, placeholder=placeholder)
# saved into a file 'PIC2_manhattan.pdf' under the folder 'pic2_showcase'
file.path(outdir,"PIC2_manhattan_chr.pdf") %>% ggsave(gp_manhattan_chr, width=10, height=4)
Figure 4.1: Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top two prioritised genes named per chromosome (x-axis).
# GSP (phase 3 or above drug targets for COVID-19)
# GSN (simulated negative targets)
ig.EF.ChEMBL <- oRDS('ig.EF.ChEMBL', placeholder=placeholder)
ind <- match('COVID-19', V(ig.EF.ChEMBL)$name)
GSP <- V(ig.EF.ChEMBL)$GSP3[[ind]]
GSN <- V(ig.EF.ChEMBL)$GSN[[ind]]
vec_methods <- c("fishers","logistic","max","sum")
names(vec_methods) <- vec_methods
ls_dT_methods <- pbapply::pblapply(vec_methods, function(x){
if(x %in% c("max","sum")){
dT <- oPierMatrix(ls_pNode, displayBy="score", aggregateBy=x, GR.Gene=GR.Gene, placeholder=placeholder, verbose=F)
}else if(x %in% c("fishers","logistic")){
dT <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy=x, GR.Gene=GR.Gene, placeholder=placeholder, verbose=F)
}
})
ls_pPerf <- lapply(ls_dT_methods, function(x){
prediction <- x$priority %>% select(name,rating)
pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})
# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_methods <- gp + coord_fixed()
ChEMBL_full <- oRDS("ChEMBL_full", placeholder=placeholder)
# naive prediction: by the frequency of approved drugs targeting a gene
data_naive <- ChEMBL_full %>% filter(phase>=4) %>% select(chembl_id_drug,Symbol) %>% distinct() %>% count(Symbol) %>% arrange(-n) %>% as.data.frame()
# pNode_naive
pNode_naive <- oPierGenes(data_naive, network.customised=STRING_fin, restart=1, GR.Gene=GR.Gene, placeholder=placeholder)
# do evaluation
ls_dT_selected <- c(ls_dT_methods, list(`Naive`=pNode_naive))
ls_pPerf <- lapply(ls_dT_selected, function(x){
if (is(x,"pNode")){
prediction <- x$priority %>% select(name,priority)
}else if(is(x,"dTarget")){
prediction <- x$priority %>% select(name,rating)
}
pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})
# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_auc <- gp + coord_fixed()
# saved into a file 'PIC2_auc.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_auc.pdf') %>% ggsave(gp_auc, width=6, height=4)
# only for PIC2
pPerf_ROC <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="ROC", highlight=T)
pPerf_PR <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="PR", highlight=T)
gp_both <- list(pPerf_ROC$gp+coord_fixed(), pPerf_PR$gp+coord_fixed()) %>% wrap_plots(ncol=2, guides='collect')
# saved into a file 'PIC2_benchmark_highlight.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_benchmark_highlight.pdf') %>% ggsave(gp_both, width=6, height=4)
Figure 5.1: AUC plot. FN, false negatives; FP, false positives; TN, true negatives; TP: true positives. The performance is compared with different rating schemes: ‘logistic’ using a logistic meta-analysis method to combine dataset-specific predictors, ‘fishers’ using a Fisher’s combined method, two conventional ones [‘sum’ (additively summing up across predictors) and ‘max’ (taking the maximum across predictors)], and ‘Naive’ for naive rating (the frequency of being therapeutically targeted by existing licensed/approved drugs, which is motivated by drug repurposing).
# KEGG: Coronavirus disease - COVID-19
# https://www.genome.jp/pathway/hsa05171
ig.KEGG.individual <- oRDS('ig.KEGG.individual', placeholder=placeholder) %>% deframe()
g <- ig.KEGG.individual$'Coronavirus disease - COVID-19'
# restricted to genes prioritised
nodes_query <- g %>% oIG2TB('nodes') %>% semi_join(dTarget$priority, by='name') %>% pull(name)
ig <- oNetInduce(g, nodes_query=nodes_query, knn=0, remove.loops=TRUE, largest.comp=F, min.comp.size=15)
V(ig)$priority <- ig %>% oIG2TB('nodes') %>% inner_join(dTarget$priority, by='name') %>% pull(rating)
V(ig)$rank <- ig %>% oIG2TB('nodes') %>% inner_join(dTarget$priority, by='name') %>% pull(rank)
V(ig)$degree <- igraph::degree(ig)
ig_covid <- ig %>% oLayout("graphlayouts.layout_with_stress")
# gp_covid
gp_covid <- ig_covid %>% oGGnetwork(node.label='name', node.label.size=2, node.label.color='black', node.label.alpha=0.95, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.01, node.xcoord="xcoord", node.ycoord="ycoord", node.color="priority", node.color.title='rating', colormap="spectral", zlim=c(0,5), node.color.alpha=0.95, node.size='degree', node.size.title='degree', node.size.range=c(1.5,5), edge.color="steelblue4", edge.color.alpha=0.3,edge.size=0.3,edge.curve=0,edge.arrow.gap=0.01)
# saved into a file 'PIC2_kegg.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_kegg.pdf') %>% ggsave(gp_covid, width=9, height=4)
# write into a file 'PIC2_kegg.txt' under the folder 'pic2_showcase'
ig_covid %>% oIG2TB('nodes') %>% transmute(Symbol, rating=priority, rank, comp, degree, description) %>% write_delim(file.path(outdir,'PIC2_kegg.txt'), delim='\t')
# Enrichment analysis of the top 1% prioritised genes in terms of known Covid-19 host pathway genes
set <- list(known=V(g)$name)
data <- dTarget$priority %>% top_frac(0.01,rating) %>% pull(name)
background <- dTarget$priority %>% pull(name)
eset <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
Figure 6.1: An expert-curated network of host gene interactions for Covid-19 (sourced from the KEGG database). The interaction network was visualised, with gene nodes color-coded by priority rating and sized by node degree (i.e. the number of neighbors).
Target genes stored in the output file PIC2_kegg.txt
above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating
).
Enrichment analysis of the top 1% prioritised genes in terms of known Covid-19 host pathway genes
set <- list(known=V(g)$name)
data <- dTarget$priority %>% top_frac(0.01,rating) %>% pull(name)
background <- dTarget$priority %>% pull(name)
eset_path <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
BioGRID_HCoV <- oRDS("BioGRID_HCoV", placeholder=placeholder)
# SARS-CoV2 (2697049)
BioGRID_HCoV %>% filter(from_tax==2697049) %>% count(Source) %>% arrange(-n) %>% filter(str_detect(Source,'PUBMED:'), n>=140)
df_subset <- BioGRID_HCoV %>% filter(from_tax==2697049) %>% filter(Source %in% c('PUBMED:34709727','PUBMED:33845483','PUBMED:33766124','PUBMED:33060197','PUBMED:32353859','PUBMED:32838362','PUBMED:33080218'))
df_subset_n <- df_subset %>% count(from,to) %>% arrange(-n)
df_pair_selected <- df_subset %>% left_join(df_subset_n, by=c('from','to'))
# Enrichment analysis in terms of known Covid-19 interacting host genes (independently identified in two or more studies)
set <- df_pair_selected %>% filter(n>=2) %>% select(from,to) %>% nest(data=-from) %>% mutate(gene=map(data,~pull(.x,to))) %>% select(-data) %>% deframe()
set$all <- unlist(set) %>% unique()
data <- dTarget$priority %>% top_frac(0.01,rating) %>% pull(name)
background <- dTarget$priority %>% pull(name)
eset_hcov <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=0, p.tail=c("one-tail","two-tails")[2])
eset_hcov %>% oSEAextract() %>% as.data.frame() %>% select(-c(id,namespace,distance)) %>% DT::datatable()
# Extract COVID-19 phase-III drug targets (sourced from ChEMBL)
ChEMBL_full <- oRDS("ChEMBL_full", placeholder=placeholder)
df_known <- ChEMBL_full %>% filter(efo_term=="COVID-19") %>% filter(phase>=3)
# Do leading prioritisation analysis
customised.genesets <- list(known=df_known %>% pull(Symbol) %>% unique())
df <- dTarget$priority %>% column_to_rownames("name") %>% top_frac(0.1,rating) %>% transmute(priority=rating, rank=rank)
set.seed(825)
eLPA <- oPierGSEA(df, customised.genesets=customised.genesets, nperm=20000)
# Extract known drug targets recovered at the leading prioritisation
df_recovered <- eLPA$df_summary %>% pull(member) %>% unlist() %>% enframe(name="Symbol", value="rank") %>% inner_join(df_known %>% distinct(Symbol,pref_name_drug), by="Symbol")
# Save into the file "PIC2_recovered.txt" under the output directory
df_recovered %>% write_delim(file.path(outdir,"PIC2_recovered.txt"), delim="\t")
Target genes stored in the output file PIC2_recovered.txt
above can be explored below. Notes, genes are ranked by priority rank (see the column rank
).
# Leading plot
gp_leading <- oGSEAdotplot(eLPA, compact=T, colormap="spectral", zlim=c(0,5), title="none", subtitle="none")
# Leading plot (core)
leading.query <- dTarget$priority %>% pull(name)
gp_leading_core <- oGSEAdotplot(eLPA, leading=T, leading.edge.only=T, leading.query.only=T, leading.query=leading.query, colormap="spectral", zlim=c(0,5))
# Save into the file "PIC2_leading.pdf" under the output directory
gp_both <- list(gp_leading_core,gp_leading) %>% wrap_plots(ncol=2, widths=c(2,1), guides="collect")
file.path(outdir,"PIC2_leading.pdf") %>% ggsave(gp_both, width=10, height=4)
Figure 7.1: Leading prioritisation plot for Covid-19 phase-III therapeutic targets. Leading prioritisation analysis was performed to quantify the degree to which Covid-19 phase-III therapeutic targets are enriched at the leading prioritisation. The leading prioritisation is defined as the core subset of prioritised target genes accounting for the enrichment signal, which can be visualised as the left-most region of the peak in running enrichment plot (middle panel). Covid-19 phase-III therapeutic targets recovered at the leading prioritisation (zoomed in left panel) are named and indicated in vertical lines (also color-coded by priority rating).
# per target, keep the maximum phased drugs only (along with disease indications)
ChEMBL_slim <- oRDS("ChEMBL_slim", placeholder=placeholder)
# dtt: approved drug targets but excluding 'Covid-19'
dtt <- ChEMBL_slim %>% filter(efo_term!='COVID-19',phase==4) %>% mutate(target_number=1) %>% mutate(pref_name_drug=detail) %>% as.data.frame()
# vec_recovered_repurpose
vec_recovered_repurpose <- dtt %>% semi_join(df_recovered %>% distinct(Symbol), by='Symbol') %>% pull(Symbol) %>% unique()
# DR
DR <- oRepurpose(vec_recovered_repurpose, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=50, size=4, legend.title="Approved", x.text.size=6, y.text.size=6)
# write into a file 'PIC2_leading_recovered_repurpose.txt' under the folder 'pic2_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'PIC2_leading_recovered_repurpose.txt'), delim='\t')
# saved into a file 'PIC2_leading_recovered_repurpose.pdf' under the folder 'pic2_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right",limits=rev)+ scale_x_discrete(position="top",limits=rev) + coord_flip()
file.path(outdir,'PIC2_leading_recovered_repurpose.pdf') %>% ggsave(gp_DR, width=4.5, height=9)
# saved into a file 'PIC2_leading_recovered_repurpose_index.pdf' under the folder 'pic2_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_left <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=0,x=0.01)), colhead=list(fg_params=list(hjust=0, x=0.01)))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
if(1){
## one-column table
gp_index <- DR$index %>% mutate(Drug=str_replace_all(Drug,', ','\n')) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
#grid::grid.draw(gp_index)
ggsave(file.path(outdir,'PIC2_leading_recovered_repurpose_index.pdf'), gp_index, width=3.5, height=9)
}else{
## two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
#df_index <- df_index %>% mutate(Drug=str_replace_all(Drug,'], .*','] +'))
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% dplyr::slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'PIC2_leading_recovered_repurpose_index2.pdf'), gp_table, width=7, height=6)
}
# HEB
df_xyw <- dtt %>% as_tibble() %>% distinct(Symbol, efo_term, pref_name_drug) %>% count(Symbol, efo_term) %>% transmute(xnode=Symbol, ynode=efo_term, weight=n) %>% inner_join(tibble(xnode=vec_recovered_repurpose), by='xnode')
big <- df_xyw %>% pivot_wider(names_from=xnode, values_from=weight, values_fill=list(weight=0)) %>% arrange(ynode) %>% column_to_rownames("ynode") %>% oBicreate()
V(big)$community <- V(big)$type
E(big)$weight <- 1
gp_heb <- oHEB(big, edge.alpha=0.8, edge.width=0.5, edge.palette='grey90', leave.label.expansion=1.01)
## saved into a file 'PIC2_leading_recovered_repurpose_heb.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_recovered_repurpose_heb.pdf') %>% ggsave(gp_heb, width=10, height=8)
# Enrichment analysis of Covid-19 phase-III drug targets recovered on leading prioritisation analysis in terms of all approved drug targets
set <- list(approved=dtt$Symbol %>% unique())
data <- df_recovered %>% pull(Symbol) %>% unique()
background <- dTarget$priority %>% pull(name)
eset_heb <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
Figure 7.2: Dot plot showing Covid-19 phase-III genes (x-axis) that are currently targeted by approved drugs in other diseases (y-axis). Dots are referenced in integers (see the next).
Figure 7.3: The information on referenced drugs and mechanisms of action. See integers above.
Drug-target-disease information stored in the output file PIC2_leading_recovered_repurpose.txt
above can be explored below.
Figure 7.4: Hierarchical edge bundling visualises genes (in pink) that are already targeted (with edges) by approved drugs in disease indications (in cyan).
eset_heb %>% oSEAextract() %>% select(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
# define the peak
peak <- eLPA$df_summary %>% filter(setID=='known') %>% pull(peak)
# df_target_leading
df_target_leading <- dTarget$priority %>% top_n(peak,rating) %>% inner_join(df_GR_gene %>% transmute(name=Symbol,chrom=seqnames), by='name') %>% left_join(df_recovered %>% transmute(name=Symbol,drug=pref_name_drug), by='name') %>% mutate(known=ifelse(is.na(drug),'no','yes')) %>% select(name,rank,rating,description,seed,nGene,eGene,cGene,chrom,known,drug)
# write into a file 'PIC2_leading.txt' under the folder 'pic2_showcase'
df_target_leading %>% write_delim(file.path(outdir,'PIC2_leading.txt'), delim='\t')
Leading target genes stored in the output file PIC2_leading.txt
above can be explored below.
# background
background <- dTarget$priority$name
# list_vec
list_vec <- list(leading=dTarget$priority %>% top_n(peak,rating) %>% pull(name))
# sets
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(list_vec, sets, size.range=c(10,1000), min.overlap=10, background=background)
# df_eTerm
df_eTerm <- esad %>% oSEAextract() %>% filter(adjp<0.05,CIl>1) %>% filter(distance==3) %>% transmute(group,onto,namespace,name,pvalue,adjp,zscore,or,CIl,CIu,distance,nA,nO,overlap)
# df_eTerm_select
df_eTerm_select <- df_eTerm %>% filter(namespace %in% c("Environmental Process","Organismal System"))
# dot plot
gp_dot <- df_eTerm_select %>% mutate(onto=namespace) %>% group_by(onto) %>% oSEAdotplot(label.top=5) + facet_wrap(onto~.)
## save into a file 'PIC2_leading_kegg_dot.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_kegg_dot.pdf') %>% ggsave(gp_dot, width=6, height=4)
# write into a file 'PIC2_leading_kegg.txt' under the folder 'pic2_showcase'
df_eTerm_select %>% arrange(group,pvalue) %>% write_delim(file.path(outdir,'PIC2_leading_kegg.txt'), delim='\t')
Figure 8.1: KEGG enrichment analysis for leading prioritised genes.
Enriched pathways stored in the output file PIC2_leading_kegg.txt
above can be explored below.
# background
background <- dTarget$priority$name
# list_vec
list_vec <- list(leading=dTarget$priority %>% top_n(peak,rating) %>% pull(name))
# sets
sets <- tibble(onto='xCell') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(list_vec, sets, size.range=c(10,1000), min.overlap=10, background=background)
# df_eTerm
df_eTerm <- esad %>% oSEAextract() %>% arrange(onto,adjp)
# oSEAggraph
ig <- oRDS("ig.xCell", placeholder=placeholder)
if(1){
xCell_signature2 <- oRDS("xCell_signature2", placeholder=placeholder)
xCell_signature2 %>% count(namespace)
nodes_query <- xCell_signature2 %>% filter(namespace!='HSC') %>% pull(name)
ig <- oDAGinduce(ig, nodes_query=nodes_query)
}
gp_ggraph <- df_eTerm %>% filter(adjp<10e-2) %>% oSEAggraph(ig, fixed=T, node.color="adjp", colormap="grey90-orange-darkred", zlim=c(0,10), node.size="zscore",slim=c(0,15),node.size.range=c(0.5,5),node.label.size=2.5, node.label.direction=c('none','leftright','topbottom')[2], node.label.color="black", node.label.alpha=1, layout=c('stress','dendrogram')[2], leave=F, edge.color='black', edge.alpha=0.2, edge.width=0.5)
## save into a file 'PIC2_leading_xcell_ggraph.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_xcell_ggraph.pdf') %>% ggsave(gp_ggraph, width=9, height=4.5)
# bigraph
data <- df_eTerm %>% inner_join(gp_ggraph$data %>% filter(leaf) %>% select(name,label), by='name') %>% filter(adjp<5e-2) %>% top_n(100, -pvalue) %>% transmute(name=label, members=overlap)
## big
big <- data %>% separate_rows(members, sep=", ") %>% mutate(flag=1) %>% select(members,name,flag) %>% pivot_wider(names_from=members, values_from=flag, values_fill=list(flag=0)) %>% column_to_rownames("name") %>% oBicreate() %>% oBigraph()
## gp_big
big <- big %>% oLayout("graphlayouts.layout_with_stress")
igraph::degree(big) -> V(big)$degree
ifelse(V(big)$type=='xnode', V(big)$name, "") -> V(big)$label
ifelse(V(big)$type=='xnode' | V(big)$degree>=1 , V(big)$name, "") -> V(big)$label
gp_big <- big %>% oGGnetwork(node.label="label", node.label.size=2, node.label.color='black', node.label.alpha=1, node.label.force=0.05, node.xcoord="xcoord", node.ycoord="ycoord", colormap='steelblue-steelblue', node.shape="type", node.size="degree", node.size.range=c(2,5), edge.color="grey80", edge.color.alpha=0.3, edge.arrow.gap=0, edge.curve=0.05)
## saved into a file 'PIC2_leading_xcell_big.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_xcell_big.pdf') %>% ggsave(gp_big, width=9, height=4)
Figure 8.2: Circular illustration of cell type enrichments, with nodes sized by Z-score and colored by FDR.
Figure 8.3: A bigraph linking enriched cell types (in circle) and their member genes (in triangle).
# background
background <- dTarget$priority$name
# list_vec
list_vec <- list(leading=dTarget$priority %>% top_n(peak,rating) %>% pull(name))
# sets
sets <- tibble(onto=c('PSG','PSGearly','PSGrecent')) %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(list_vec, sets, size.range=c(0,50000), min.overlap=0, background=background)
# df_eTerm
df_eTerm <- esad %>% oSEAextract() %>% filter(onto=='PSG') %>% arrange(onto,distance)
# forest plot
gp_forest <- df_eTerm %>% oSEAforest(top=100, adjp.cutoff=1.01, zlim=c(0,10), colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="none")
# save into a file 'PIC2_leading_psg_forest.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_psg_forest.pdf') %>% ggsave(gp_forest, width=7, height=3.5)
# write into a file 'PIC2_leading_psg_forest.txt' under the folder 'pic2_showcase'
df_eTerm %>% select(name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(name,adjp) %>% write_delim(file.path(outdir,'PIC2_leading_psg_forest.txt'), delim='\t')
Figure 8.4: Forest plot of phylostrata enrichments. A phylostratum contains a group of genes that were first created at this phylostratum (defined as gene evolutionary age), dated through genomic phylostratigraphy (PSG). Plylostrata are ordered according to evolutionary history, from the earliest (PSG01) to the most recent (PSG16). Also listed are genes first created at each enriched phylostratum.
Enriched phylostrata stored in the output file PIC2_leading_psg_forest.txt
above can be explored below.
KEGG enrichment analysis for enriched PSG
# ls_psg
ls_psg <- df_eTerm %>% filter(adjp<1e-2) %>% separate_rows(overlap,sep=', ') %>% distinct(name,overlap) %>% filter(overlap!='') %>% nest(data=-name) %>% mutate(gene=map(data,~pull(.x,overlap))) %>% select(-data) %>% deframe()
# enrichment analysis
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
background <- ls_psg %>% unlist() %>% unique()
esad <- oSEAadv(ls_psg, sets, background=NULL, size.range=c(10,500), min.overlap=10)
# upset for PSG08
levels.customised <- ls_psg$`PSG08 (Euteleostomi)`
gp_upset <- esad %>% oSEAextract() %>% filter(namespace %in% c('Environmental Process'), group %in% c('PSG08 (Euteleostomi)')) %>% oSEAupset(top=3, adjp.cutoff=0.01, color="orange", slim=c(0,40), shape=18, size.range=c(1,5), member.levels="customised", levels.customised=levels.customised)
gp_upset_psg08 <- gp_upset + ggtitle('PSG08 (Euteleostomi)')
# upset for PSG09
levels.customised <- ls_psg$`PSG09 (Amniota)`
gp_upset <- esad %>% oSEAextract() %>% filter(namespace %in% c('Environmental Process'), group %in% c('PSG09 (Amniota)')) %>% oSEAupset(top=3, adjp.cutoff=0.01, color="orange", slim=c(0,40), shape=18, size.range=c(1,5), member.levels="customised", levels.customised=levels.customised)
gp_upset_psg09 <- gp_upset + ggtitle('PSG09 (Amniota)')
# save into a file 'PIC2_leading_psg_upset.pdf' under the folder 'pic2_showcase'
gp_both <- gp_upset_psg08 + gp_upset_psg09 + plot_layout(width=c(1,1), guides='collect')
file.path(outdir,'PIC2_leading_psg_upset.pdf') %>% ggsave(gp_both, width=7, height=8)
Figure 8.5: Kite-like plot for KEGG pathways enriched in PSG08 (Euteleostomi) and PSG09 (Amniota). Enrichment significance (FDR) is estimated based on one-sided Fisher’s exact test. Each kite is sized by the number of member genes (indicated by blue dots beneath).
esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% c('Environmental Process'), group %in% c('PSG08 (Euteleostomi)','PSG09 (Amniota)')) %>% arrange(group,adjp) %>% transmute(group,name,adjp,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
# background
background <- dTarget$priority$name
# list_vec
list_vec <- list(leading=dTarget$priority %>% top_n(peak,rating) %>% pull(name))
# sets
sets <- tibble(onto=c('GOMF','GOBP','GOCC','MDODD','KEGG','HPPA')) %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(list_vec, sets, size.range=c(10,1000), min.overlap=10, background=background)
# df_eTerm
df_eTerm_out <- esad %>% oSEAextract() %>% filter(adjp<0.05,CIl>1) %>% transmute(group,onto,namespace,name,pvalue,adjp,zscore,or,CIl,CIu,distance,nA,nO,overlap)
df_eTerm_select <- df_eTerm_out %>% filter(namespace %in% c("Environmental Process","Organismal System","Cellular Process","Human Disease","Genetic Process","Metabolism","Biological Process","Cellular Component","Molecular Function","Disease Or Disorder","Phenotypic Abnormality")[c(1,7,9)])
# dot plot
gp_dot <- df_eTerm_select %>% mutate(namespace=fct_inorder(namespace)) %>% mutate(onto=namespace) %>% oSEAdotplot(label.top=10, label.direction.y="none") + facet_wrap(onto~.)
## save into a file 'PIC2_leading_go_dot.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_leading_go_dot.pdf') %>% ggsave(gp_dot, width=10, height=4)
Figure 8.6: GO enrichment analysis for leading prioritised genes.
From the list of all prioritised target genes, a concise and manageable list of genes are identified, likely mediating the crosstalk between molecular pathways (sourced from KEGG), that is, targets at the pathway crosstalk level.
org.Hs.egKEGGEnvironmentalOrganismal <- oRDS('org.Hs.egKEGGEnvironmentalOrganismal', placeholder=placeholder)
org.Hs.egKEGGEnvironmentalProcess <- oRDS('org.Hs.egKEGGEnvironmentalProcess', placeholder=placeholder)
ig.KEGG.category <- oRDS('ig.KEGG.category', placeholder=placeholder) %>% deframe()
ig <- ig.KEGG.category$'Environmental Process'
ig2 <- oNetInduce(ig, nodes_query=V(ig)$name, largest.comp=T) %>% as.undirected()
subg <- oPierSubnet(dTarget, network.customised=ig2, subnet.size=50)
# do permutation test
#subg_perm <- oPierSubnet(dTarget, network.customised=ig2, subnet.size=50, test.permutation=T, num.permutation=100, respect="degree", aggregateBy="fishers")
#subg_perm$combinedP: 1.408528e-112
#dnet::dPvalAggregate(pmatrix=matrix(E(subg_perm)$edgeConfidence, nrow = 1), method='fishers')
# save into a file 'subg_PIC2.RData' under the folder 'pic2_showcase'
save(list='subg', file=file.path(outdir,'subg_PIC2.RData'))
# write into a file 'PIC2_priority_subg.txt' under the folder 'pipe_showcase'
dTarget$priority %>% select(name,rating,rank,seed,nGene,eGene,cGene,description) %>% semi_join(tibble(name=V(subg)$name), by='name') %>% write_delim(file.path(outdir,'PIC2_priority_subg.txt'), delim='\t')
Pathway crosstalk genes stored in the output file PIC2_priority_subg.txt
above can be explored below. Notes, genes are ranked by priority rating (0-5; see the column rating
).
ig <- subg
# degree
V(ig)$degree <- igraph::degree(ig)
# layout: xcoord + ycoord
ig <- ig %>% oLayout("graphlayouts.layout_with_stress")
# label
V(ig)$label <- tibble(name=V(ig)$name) %>% inner_join(dTarget$priority, by='name') %>% mutate(rank=sprintf("%03d",rank)) %>% mutate(label=str_c(name,'®', rank)) %>% pull(label)
# gp_subg
gp_subg <- ig %>% oGGnetwork(node.label='label', node.label.size=2.5, node.label.color='black', node.label.alpha=1, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.05, node.xcoord="xcoord", node.ycoord="ycoord", node.color="priority", node.color.title='rating', colormap="spectral", zlim=c(0,5), node.color.alpha=0.95, node.size='degree', node.size.title='degree', node.size.range=c(1.5,5), edge.color="steelblue4", edge.color.alpha=0.5, edge.size=0.3, edge.curve=0.05, edge.arrow.gap=0.01)
#gp_subg + guides(color="none", size="none")
# saved into a file 'PIC2_subg_noevidence.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_noevidence.pdf') %>% ggsave(gp_subg, width=8, height=6)
Figure 9.1: Visualisation of the pathway crosstalk, with nodes labelled by gene symbols (®ranks).
# KEGG pathway enrichment analysis
list_vec <- list(subg=V(subg)$name)
background <- dTarget$priority$name
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
esad <- oSEAadv(list_vec, sets, size.range=c(10,1000), min.overlap=10, background=background)
# UpSet plot
## levels.customised
levels.customised <- dTarget$priority %>% inner_join(tibble(name=V(subg)$name), by='name') %>% mutate(rank=sprintf("%03d",rank)) %>% mutate(label=str_c(name,'®', rank)) %>% pull(label)
## df_output
df_output <- esad %>% oSEAextract() %>% separate_rows(overlap, sep=', ') %>% left_join(dTarget$priority %>% select(name,rank), by=c('overlap'='name')) %>% mutate(rank=sprintf("%03d",rank)) %>% mutate(overlap=str_c(overlap,'®',rank)) %>% select(-rank) %>% nest(data=overlap) %>% mutate(overlap=map(data,~str_c(.x %>% pull(overlap),collapse=', '))) %>% select(-data)
## gp_upset
gp_upset <- df_output %>% filter(namespace %in% c("Environmental Process")) %>% oSEAupset(top=6, adjp.cutoff=0.01, color="cyan4", slim=c(0,40), shape=18, size.range=c(1,5), member.levels="customised", levels.customised=levels.customised)
## saved into a file 'PIC2_subg_kegg_EP.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_kegg_EP.pdf') %>% ggsave(gp_upset, width=4, height=6)
df_output %>% filter(namespace %in% c("Environmental Process")) %>% transmute(name,adjp,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
Figure 9.2: Kite-like plot for KEGG pathways enriched in crosstalk genes. Enrichment significance (FDR) is estimated based on one-sided Fisher’s exact test. Each kite is sized by the number of member genes (indicated by blue dots beneath).
Pathway crosstalk decomposition
data <- esad %>% oSEAextract() %>% filter(namespace %in% c('Environmental Process')) %>% filter(adjp<0.01) %>% top_n(100, -pvalue) %>% transmute(name=name, members=overlap)
# identify the minimum spanning tree (mst)
tig <- data %>% oTIG(method='jaccard', empirical.cutoff=0.5)
gp_mst <- oGGnetwork(tig, node.label='name', node.label.size=2.5, node.label.color='steelblue4', node.label.alpha=1, node.label.force=1, node.xcoord='xcoord', node.ycoord='ycoord', colormap="cyan4-cyan4", edge.size='weight', node.shape=18, node.size='n_members', node.size.title="Number of\nmember genes", slim=c(0,40), node.size.range=c(1,5), edge.color='cyan4', edge.color.alpha=0.3, edge.curve=0, edge.arrow.gap=0.01)
# saved into a file 'PIC2_subg_kegg_mst.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_kegg_mst.pdf') %>% ggsave(gp_mst, width=4, height=3)
# write into a file 'PIC2_subg_kegg_mst_edges.txt' under the folder 'pic2_showcase'
tig %>% oIG2TB('edges') %>% transmute(from, to, n_shared, shared, weight) %>% write_delim(file.path(outdir,'PIC2_subg_kegg_mst_edges.txt'), delim='\t')
# write into a file 'PIC2_subg_kegg_mst_nodes.txt' under the folder 'pic2_showcase'
tig %>% oIG2TB('nodes') %>% transmute(name, n_members, members) %>% write_delim(file.path(outdir,'PIC2_subg_kegg_mst_nodes.txt'), delim='\t')
Figure 9.3: Pathway-centric representation of the crosstalk, with nodes for pathways (sized by the number of member genes) and edges for connections (the thickness proportional to the number of genes shared between two-endpoint pathways).
Target genes stored in the output file PIC2_subg_kegg_mst_edges.txt
above can be explored below.
Target genes stored in the output file PIC2_subg_kegg_mst_nodes.txt
above can be explored below.
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_left <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=0,x=0.01)), colhead=list(fg_params=list(hjust=0, x=0.01)))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
# tabular display of known targets
gp_known <- df_drug %>% inner_join(tibble(Symbol=V(subg)$name), by='Symbol') %>% mutate(pref_name_drug=str_replace_all(Drug,', ','\n')) %>% select(Symbol,pref_name_drug) %>% gridExtra::tableGrob(rows=NULL,cols=c('Target','Drugs [mechanisms of action] (development phase)'), theme=tt_right)
grid::grid.draw(gp_known)
# saved into a file 'PIC2_subg_known.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_known.pdf') %>% ggsave(gp_known, width=3.5, height=2)
# enrichment analysis of crosstalk genes in terms of Covid-19 known targets
set <- list(known=df_drug %>% pull(Symbol) %>% unique())
data <- V(subg)$name
background <- dTarget$priority %>% pull(name)
eset_known <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
eset_known %>% oSEAextract() %>% transmute(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
Figure 9.4: A tabular display of crosstalk genes that are also Covid-19 phase-III drug targets. Also displayed is the information on drug candidates and mechanisms of action.
ChEMBL_slim <- oRDS("ChEMBL_slim", placeholder=placeholder)
# enrichment analysis in terms of approved or phased targets (excluding Covid-19)
set <- ChEMBL_slim %>% filter(efo_term!='COVID-19', phase>=3) %>% select(Symbol,phase) %>% distinct() %>% mutate(phase=ifelse(phase==4,'approved','phased')) %>% nest(data=-phase) %>% mutate(data=map(data,~pull(.x))) %>% deframe()
data <- V(subg)$name
background <- dTarget$priority %>% pull(name)
eset <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
# gp_forest
gp_forest <- oSEAforest(eset, zlim=c(0,6), colormap='ggplot2.top', legend.direction="vertical", sortBy="none")
# repurposing
## dtt
dtt <- ChEMBL_slim %>% filter(efo_term!='COVID-19',phase>=3) %>% mutate(target_number=1) %>% mutate(pref_name_drug=detail) %>% as.data.frame()
## vec_data
df_data <- tibble(Symbol=V(subg)$name)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)
## DR
DR <- vec_data %>% oRepurpose(DTT=dtt, phase.min=3, reorder="none", colormap="spectral.top", zlim=c(1,4), label.size=3, label.color="white", x.rotate=80, size=5, x.text.size=7, y.text.size=7)
gp_DR <- DR$gp + guides(color="none") + scale_y_discrete(position="right")
# saved into a file 'PIC2_subg_repurpose.pdf' under the folder 'pic2_showcase'
gp_both <- gp_DR + (gp_forest + scale_x_continuous(position="top",limits=c(0,5)) + guides(size='none')) + plot_layout(widths=c(5,1), guides='collect')
file.path(outdir,'PIC2_subg_repurpose.pdf') %>% ggsave(gp_both, width=11, height=6)
# saved into a file 'PIC2_subg_repurpose_index.pdf' under the folder 'pic2_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_left <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=0,x=0.01)), colhead=list(fg_params=list(hjust=0, x=0.01)))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
if(0){
## one-column table
gp_index <- DR$index %>% mutate(Drug=str_replace_all(Drug,', ','\n')) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
file.path(outdir,'PIC2_subg_repurpose_index.pdf') %>% ggsave(gp_index, width=3.5, height=3.7)
grid::grid.draw(gp_index)
}else{
# two-column tables
tmp <- DR$index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(ceiling(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- DR$index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action] (development phase)'), theme=tt_right)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action] (development phase)'), theme=tt_right)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
file.path(outdir,'PIC2_subg_repurpose_index2.pdf') %>% ggsave(gp_table, width=9, height=4.5)
}
Figure 9.5: Dot plot showing crosstalk genes (y-axis) that are currently targeted by approved (phase-IV) and phase-III drugs in diseases other than Covid-19 (x-axis). Dots are colour-coded by maximum drug development phases and referenced in integers. Top-right panel: Forest plots of approved or phase-III drug targets enriched in crosstalk genes. The significance level (FDR), odds ratio and 95% confidence intervals (represented by lines) calculated according to one-sided Fisher’s exact test.
Figure 9.6: The information for referenced drugs and mechanisms of action.
Removal analysis is conducted to evaluate the effect of nodes on the crosstalk, done so: (i) removed individually (i.e. single-node removal), and (ii) removed in combination (e.g. combinatorial removal involving any two nodes). Removing nodes, if critical for the network, would result in a large fraction of nodes disconnected from the largest network component. These critical nodes are identified based on the optimal removal with the largest effect when removing single nodes or any two nodes in combination.
# df_removal
levels <- subg %>% oIG2TB('nodes') %>% arrange(name) %>% pull(name)
ls_df <- lapply(seq(3), function(i){
combn(levels,i,simplify=F) -> nodes.combine
df <- oAttack(subg, measure="combine", nodes.combine=nodes.combine)
df %>% mutate(i=i)
})
df_removal <- do.call(rbind, ls_df) %>% arrange(i,-frac.disconnected)
# an index of phase-III and above drugs (excluding Covid-19)
## df_target_phase_info
df_efo <- ChEMBL_slim %>% filter(efo_term!='COVID-19', phase>=3) %>% nest(data=efo_term) %>% mutate(efo_term=map_chr(data,~str_c(unlist(.x),collapse=' | '))) %>% select(-data)
df_target_phase_info <- df_efo %>% mutate(info=str_c(pref_name_drug, ' [', mechanism_of_action,']', ' (', efo_term, ')')) %>% select(Symbol, phase, info) %>% arrange(-phase, Symbol, info) %>% nest(data=info) %>% mutate(info=map_chr(data,~str_c(unlist(.x),collapse='; '))) %>% select(-data)
## df_pit
df_pit <- subg %>% oIG2TB('node') %>% transmute(Symbol=name, pit=as.numeric(priority))
# df_pid_single_pit
## df_pid_single
df_pid_single <- df_removal %>% filter(i==1) %>% transmute(Symbol=nodes.removed, pid=frac.disconnected) %>% left_join(df_target_phase_info, by='Symbol') %>% arrange(-pid, -phase)
## df_pid_single_pit
df_pid_single_pit <- df_pid_single %>% left_join(df_pit, by='Symbol') %>% arrange(-pid,-phase,-pit)
## write into a file 'PIC2_pid_single.txt' under the folder 'pic2_showcase'
df_pid_single_pit %>% write_delim(file.path(outdir,'PIC2_pid_single.txt'), delim='\t')
# gp_optimal (all single + two- or three-node optimal)
## df_single
df_single <- df_removal %>% filter(i==1) %>% arrange(-frac.disconnected)
## df_comb_best
df_comb_best <- df_removal %>% filter(i>1, i<=3) %>% group_by(i) %>% top_n(1, frac.disconnected) %>% ungroup()
## gp_optimal
data <- rbind(df_comb_best, df_single) %>% distinct() %>% arrange(i,frac.disconnected) %>% mutate(x=fct_inorder(nodes.removed)) %>% transmute(value=frac.disconnected, member=nodes.removed)
levels.customised <- data %>% separate_rows(member,sep=',') %>% pull(member) %>% unique()
gp <- data %>% oUpsetAdv(member.levels='customised',levels.customised=levels.customised, label.height.unit=6.5)
gp <- gp + scale_y_continuous(limits=NULL) + geom_point(aes(fill=as.factor(i)), shape=22, size=2, color="transparent") + geom_line(aes(group=i,color=as.factor(i))) + guides(color='none')
gp_optimal <- gp + ylab('Effect on the crosstalk measured \nby fraction of nodes disconnected\nupon node removal') + scale_fill_discrete(guide=guide_legend('Optimal combination',title.position="top",nrow=1)) + theme(legend.position="bottom", axis.title.y=element_text(size=8), axis.text.y=element_text(size=7))
## saved into a file 'PIC2_subg_removal_optimal.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_removal_optimal.pdf') %>% ggsave(gp_optimal, width=8, height=8)
# gp_highlight
## visualise the crosstalk highlighted by nodes
vec_highlight <- df_removal %>% group_by(i) %>% top_n(1, frac.disconnected) %>% ungroup() %>% separate_rows(nodes.removed, sep=',') %>% pull(nodes.removed) %>% unique()
ig <- subg
## degree
V(ig)$degree <- igraph::degree(ig)
## layout: xcoord + ycoord
ig <- ig %>% oLayout("graphlayouts.layout_with_stress")
## highlight: label + color
df_hightlight <- tibble(name=vec_highlight, flag=1)
df_hightlight_label <- tibble(name=V(ig)$name) %>% left_join(df_hightlight, by='name') %>% mutate(flag=ifelse(is.na(flag),0,flag)) %>% mutate(label=ifelse(flag==1,name,''))
V(ig)$label <- df_hightlight_label %>% pull(label)
V(ig)$color <- df_hightlight_label %>% pull(flag)
gp_highlight <- ig %>% oGGnetwork(node.label='label', node.label.size=3, node.label.color='black', node.label.alpha=1, node.label.padding=0.1, node.label.arrow=0, node.label.force=1, node.xcoord="xcoord", node.ycoord="ycoord", node.color="color", node.color.title='highlight', colormap="steelblue4-darkred", zlim=c(0,1), node.color.alpha=0.7, node.size='degree', node.size.title='degree', node.size.range=c(1.5,4.5), edge.color="steelblue4", edge.color.alpha=0.5, edge.size=0.3, edge.curve=0.05, edge.arrow.gap=0.01) + guides(color="none", size="none")
## saved into a file 'PIC2_subg_removal_highlight.pdf' under the folder 'pic2_showcase'
file.path(outdir,'PIC2_subg_removal_highlight.pdf') %>% ggsave(gp_highlight, width=8, height=6)
Figure 9.7: Effect of node removals, individually or in combination, on the crosstalk. Fraction of disconnected nodes (drug index) in the y-axis is plotted against node removal (indicated by colored circles beneath) in the x-axis. Notably, only the optimal removal with the largest effect is illustrated for two-node combinatorial removal.
The drug index (based on crosstalk genes removed individually) stored in the output file PIC2_pid_single.txt
above can be explored below.
Figure 9.8: Visualisation of the crosstalk, labelled for genes with contextual removals.