In this demo, the PIA
is applied to the asthma GWAS summary-level data, enabling genetics-led and network-driven discovery of shared and distinct drug targets for adult- and childhood-onset asthma.
# Please install R (version 4.2.1 or above)
# see https://cran.r-project.org
# if the package 'BiocManager' not installed, please do so
if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# first, install basic packages: remotes, tidyverse, pbapply, patchwork
BiocManager::install(c('remotes','tidyverse','pbapply','patchwork'), dependencies=T)
# then, install the package 'PIA' (now hosted at github)
BiocManager::install("hfang-bristol/PIA", dependencies=T, force=T)
# check the package 'PIA' successfully installed
library(help=PIA)
# load packages
library(PIA)
library(tidyverse)
library(patchwork)
# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_pia"
# create the output folder 'pia_showcase'
outdir <- 'pia_showcase'
if(!dir.exists(outdir)) dir.create(outdir)
# read GWAS summary data for asthma (European or British populations)
GWAScatalog <- oRDS("GWAScatalog", placeholder=placeholder)
input_data_asthma <- GWAScatalog %>% filter(pvalue<5e-8) %>% filter(str_detect(EF_name,"asthma")) %>% filter(str_detect(initial_sample_description,"European|British")) %>% transmute(trait=EF_name, snp=name, pvalue)
# ls_traits
ls_traits <- list()
## asthma (irrespective of onset ages)
ls_traits[[1]] <- input_data_asthma %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='asthma', snp, pvalue)
## adult onset asthma
ls_traits[[2]] <- input_data_asthma %>% filter(str_detect(trait,'adult onset asthma')) %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='adult onset asthma', snp, pvalue)
## childhood onset asthma
ls_traits[[3]] <- input_data_asthma %>% filter(str_detect(trait,'childhood onset asthma')) %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='childhood onset asthma', snp, pvalue)
## assign with names
names(ls_traits) <- c("asthma","adult onset asthma","childhood onset asthma")
# LD.customised
LD.customised <- oRDS('GWAS_LD.EUR', placeholder=placeholder) %>% as.data.frame()
# GR.SNP
#getOption("timeout"): 60
options(timeout=120) # please increase the timeout if failed to download
GR.SNP <- oRDS("dbSNP_Common", placeholder=placeholder)
# GR.Gene
GR.Gene <- oRDS("UCSC_knownGene", placeholder=placeholder)
df_GR_gene <- GR.Gene %>% as.data.frame() %>% as_tibble()
# include.RGB (53)
include.RGB <- c("PCHiC_PMID27863249_Activated_total_CD4_T_cells","PCHiC_PMID27863249_Endothelial_precursors","PCHiC_PMID27863249_Erythroblasts","PCHiC_PMID27863249_Fetal_thymus","PCHiC_PMID27863249_Macrophages_M0","PCHiC_PMID27863249_Macrophages_M1","PCHiC_PMID27863249_Macrophages_M2","PCHiC_PMID27863249_Megakaryocytes","PCHiC_PMID27863249_Monocytes","PCHiC_PMID27863249_Naive_B_cells","PCHiC_PMID27863249_Naive_CD4_T_cells","PCHiC_PMID27863249_Naive_CD8_T_cells","PCHiC_PMID27863249_Neutrophils","PCHiC_PMID27863249_Nonactivated_total_CD4_T_cells","PCHiC_PMID27863249_Total_B_cells","PCHiC_PMID27863249_Total_CD4_T_cells","PCHiC_PMID27863249_Total_CD8_T_cells", "PCHiC_PMID31501517_AdrenalGland","PCHiC_PMID31501517_Aorta","PCHiC_PMID31501517_Bladder","PCHiC_PMID31501517_Cardiomyocytes","PCHiC_PMID31501517_DorsolateralPrefrontalCortex","PCHiC_PMID31501517_Esophagus","PCHiC_PMID31501517_Fat","PCHiC_PMID31501517_Fibroblast","PCHiC_PMID31501517_Gastric","PCHiC_PMID31501517_H1","PCHiC_PMID31501517_H1MesenchymalStemCell","PCHiC_PMID31501517_H1MesendodermStemCell","PCHiC_PMID31501517_H1NeuralProgenitorCell","PCHiC_PMID31501517_Hippocampus","PCHiC_PMID31501517_LCL","PCHiC_PMID31501517_LeftHeartVentricle","PCHiC_PMID31501517_Liver","PCHiC_PMID31501517_Lung","PCHiC_PMID31501517_Ovary","PCHiC_PMID31501517_Pancreas","PCHiC_PMID31501517_Psoas","PCHiC_PMID31501517_RightHeartVentricle","PCHiC_PMID31501517_RightHeatAtrium","PCHiC_PMID31501517_SigmoidColon","PCHiC_PMID31501517_SmallBowel","PCHiC_PMID31501517_Spleen","PCHiC_PMID31501517_Thymus","PCHiC_PMID31501517_Trophoblast", "PCHiC_PMID31367015_astrocytes","PCHiC_PMID31367015_excitatory","PCHiC_PMID31367015_hippocampal","PCHiC_PMID31367015_motor", "PCHiC_PMID31253982_islet","PCHiC_PMID29955040_CMhESC", "PCHiC_PMID25938943_CD34","PCHiC_PMID25938943_GM12878")
# include.QTL (114)
include.QTL <- c("eQTL_eQTLGen", "pQTL_Plasma")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNg","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNgSalmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_Salmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_naive","eQTL_eQTLCatalogue_BLUEPRINT_Tcell","eQTL_eQTLCatalogue_BLUEPRINT_monocyte","eQTL_eQTLCatalogue_BLUEPRINT_neutrophil","eQTL_eQTLCatalogue_BrainSeq_brain","eQTL_eQTLCatalogue_CEDAR_microarray_Bcell_CD19","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD4","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD8","eQTL_eQTLCatalogue_CEDAR_microarray_ileum","eQTL_eQTLCatalogue_CEDAR_microarray_monocyte_CD14","eQTL_eQTLCatalogue_CEDAR_microarray_neutrophil_CD15","eQTL_eQTLCatalogue_CEDAR_microarray_platelet","eQTL_eQTLCatalogue_CEDAR_microarray_rectum","eQTL_eQTLCatalogue_CEDAR_microarray_transverse_colon","eQTL_eQTLCatalogue_FUSION_adipose_naive","eQTL_eQTLCatalogue_FUSION_muscle_naive","eQTL_eQTLCatalogue_Fairfax_2012_microarray_Bcell_CD19","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_IFN24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS2","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_naive","eQTL_eQTLCatalogue_GENCORD_LCL","eQTL_eQTLCatalogue_GENCORD_Tcell","eQTL_eQTLCatalogue_GENCORD_fibroblast","eQTL_eQTLCatalogue_GEUVADIS_LCL")
include.QTL <- c(include.QTL,"eQTL_eQTLCatalogue_GTEx_V8_Adipose_Subcutaneous","eQTL_eQTLCatalogue_GTEx_V8_Adipose_Visceral_Omentum","eQTL_eQTLCatalogue_GTEx_V8_Adrenal_Gland","eQTL_eQTLCatalogue_GTEx_V8_Artery_Aorta","eQTL_eQTLCatalogue_GTEx_V8_Artery_Coronary","eQTL_eQTLCatalogue_GTEx_V8_Artery_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Brain_Amygdala","eQTL_eQTLCatalogue_GTEx_V8_Brain_Anterior_cingulate_cortex_BA24","eQTL_eQTLCatalogue_GTEx_V8_Brain_Caudate_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellar_Hemisphere","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellum","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Brain_Frontal_Cortex_BA9","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hippocampus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hypothalamus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Nucleus_accumbens_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Putamen_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Spinal_cord_cervical_c1","eQTL_eQTLCatalogue_GTEx_V8_Brain_Substantia_nigra","eQTL_eQTLCatalogue_GTEx_V8_Breast_Mammary_Tissue","eQTL_eQTLCatalogue_GTEx_V8_Cells_Cultured_fibroblasts","eQTL_eQTLCatalogue_GTEx_V8_Cells_EBVtransformed_lymphocytes","eQTL_eQTLCatalogue_GTEx_V8_Colon_Sigmoid","eQTL_eQTLCatalogue_GTEx_V8_Colon_Transverse","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Gastroesophageal_Junction","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Mucosa","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Muscularis","eQTL_eQTLCatalogue_GTEx_V8_Heart_Atrial_Appendage","eQTL_eQTLCatalogue_GTEx_V8_Heart_Left_Ventricle","eQTL_eQTLCatalogue_GTEx_V8_Kidney_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Liver","eQTL_eQTLCatalogue_GTEx_V8_Lung","eQTL_eQTLCatalogue_GTEx_V8_Minor_Salivary_Gland","eQTL_eQTLCatalogue_GTEx_V8_Muscle_Skeletal","eQTL_eQTLCatalogue_GTEx_V8_Nerve_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Ovary","eQTL_eQTLCatalogue_GTEx_V8_Pancreas","eQTL_eQTLCatalogue_GTEx_V8_Pituitary","eQTL_eQTLCatalogue_GTEx_V8_Prostate","eQTL_eQTLCatalogue_GTEx_V8_Skin_Not_Sun_Exposed_Suprapubic","eQTL_eQTLCatalogue_GTEx_V8_Skin_Sun_Exposed_Lower_leg","eQTL_eQTLCatalogue_GTEx_V8_Small_Intestine_Terminal_Ileum","eQTL_eQTLCatalogue_GTEx_V8_Spleen","eQTL_eQTLCatalogue_GTEx_V8_Stomach","eQTL_eQTLCatalogue_GTEx_V8_Testis","eQTL_eQTLCatalogue_GTEx_V8_Thyroid","eQTL_eQTLCatalogue_GTEx_V8_Uterus","eQTL_eQTLCatalogue_GTEx_V8_Vagina","eQTL_eQTLCatalogue_GTEx_V8_Whole_Blood")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_HipSci_iPSC","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD4","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD8","eQTL_eQTLCatalogue_Lepik_2017_blood","eQTL_eQTLCatalogue_Naranbhai_2015_microarray_neutrophil_CD16","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Listeria","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Salmonella","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_naive","eQTL_eQTLCatalogue_Quach_2016_monocyte_IAV","eQTL_eQTLCatalogue_Quach_2016_monocyte_LPS","eQTL_eQTLCatalogue_Quach_2016_monocyte_Pam3CSK4","eQTL_eQTLCatalogue_Quach_2016_monocyte_R848","eQTL_eQTLCatalogue_Quach_2016_monocyte_naive","eQTL_eQTLCatalogue_ROSMAP_brain_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Bcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_NKcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Tfh_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th117_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th17_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th1_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th2_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_CD16_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_naive","eQTL_eQTLCatalogue_Schwartzentruber_2018_sensory_neuron","eQTL_eQTLCatalogue_TwinsUK_LCL","eQTL_eQTLCatalogue_TwinsUK_blood","eQTL_eQTLCatalogue_TwinsUK_fat","eQTL_eQTLCatalogue_TwinsUK_skin","eQTL_eQTLCatalogue_van_de_Bunt_2015_pancreatic_islet")
# network.customised
network.customised <- oDefineNet(network="STRING_high", STRING.only=c("experimental_score","database_score"), placeholder=placeholder)
input_data <- ls_traits$`asthma` %>% select(snp,pvalue) %>% as.data.frame()
# prepare predictors (it takes 16 min or so)
ls_pNode_genomic <- oPierSNPsAdv(data=input_data, LD.customised=LD.customised, significance.threshold=5e-8, score.cap=100, distance.max=20000, decay.kernel="constant", GR.SNP=GR.SNP, GR.Gene=GR.Gene, include.QTL=include.QTL, include.RGB=include.RGB, network.customised=network.customised, placeholder=placeholder, verbose.details=F, verbose=F, parallel=F)
# save into a file 'PIA.ls_pNode_genomic.RData'
save(list='ls_pNode_genomic', file='PIA.ls_pNode_genomic.RData')
# do prioritisation
ls_pNode <- Filter(Negate(is.null), ls_pNode_genomic)
dTarget <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy="orderStatistic", keep=F, placeholder=placeholder)
# write into a file 'PIA_priority.txt.gz' under the folder 'pia_showcase'
dTarget$priority %>% select(name,rating,rank,seed,nGene,eGene,cGene,description) %>% write_delim(file.path(outdir,'PIA_priority.txt.gz'), delim='\t')
# save into a file 'PIA.dTarget.RData' under the folder 'pia_showcase'
save(list='dTarget', file=file.path(outdir,'PIA.dTarget.RData'))
Target genes stored in the output file PIA_priority.txt.gz
above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating
).
# top 1 per chromosome
dT_tmp <- dTarget
dT_tmp$priority <- dTarget$priority %>% top_frac(0.5,rating)
top.label.query <- dT_tmp$priority %>% inner_join(df_GR_gene, by=c('name'='Symbol')) %>% group_by(seqnames) %>% top_n(1,rating) %>% pull(name)
label.query.only <- F
gp_manhattan_chr <- oPierManhattan(dT_tmp, color=c("darkred", "steelblue4"), point.size=0.2, top=100000, top.label.type="text", top.label.size=2, top.label.col="black", top.label.query=top.label.query, label.query.only=label.query.only, y.lab="Priority index", signature=F, placeholder=placeholder)
# saved into a file 'PIA_manhattan_chr.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_manhattan_chr.pdf'), gp_manhattan_chr, width=8, height=3)
Figure 4.1: Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top prioritised gene named per chromosome (x-axis).
# GSP (phase 2 or above drug targets for asthma)
# GSN (simulated negative targets)
ig.EF.ChEMBL <- oRDS('ig.EF.ChEMBL', placeholder=placeholder)
ind <- str_detect(V(ig.EF.ChEMBL)$name,'asthma') %>% which()
GSP <- V(ig.EF.ChEMBL)$GSP[[ind]]
GSN <- V(ig.EF.ChEMBL)$GSN[[ind]]
vec_methods <- c("fishers","orderStatistic","max","sum")
names(vec_methods) <- vec_methods
ls_dT_methods <- pbapply::pblapply(vec_methods, function(x){
if(x %in% c("max","sum","harmonic")){
dT <- oPierMatrix(ls_pNode, displayBy="score", aggregateBy=x, keep=F, placeholder=placeholder, verbose=F)
}else if(x %in% c("fishers","logistic","Ztransform","orderStatistic")){
dT <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy=x, keep=F, placeholder=placeholder, verbose=F)
}
})
ls_pPerf <- lapply(ls_dT_methods, function(x){
if(is(x,"pNode")){
prediction <- x$priority %>% select(name,priority)
}else if(is(x,"dTarget")){
prediction <- x$priority %>% select(name,rating)
}
pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})
# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_methods <- gp + coord_fixed()
ChEMBL_v30 <- oRDS("ChEMBL_v30", placeholder=placeholder)
# naive prediction: by the frequency of approved drugs targeting a gene
data_naive <- ChEMBL_v30 %>% filter(target_number<=5, phase>=4) %>% select(chembl_id_drug,Symbol) %>% distinct() %>% count(Symbol) %>% arrange(-n) %>% as.data.frame()
# pNode_naive
pNode_naive <- oPierGenes(data_naive, network.customised=network.customised, restart=1, placeholder=placeholder)
# From Open Targets to download: ASM_MONDO_0004979-associated-diseases.tsv
opentargets_file <- list.files(path=outdir, pattern=str_c("^ASM_.*.tsv"), full.names=T)
df <- read_delim(opentargets_file, delim='\t', col_types=cols_only(symbol='c',overallAssociationScore='d',geneticAssociations='d',somaticMutations='d',drugs='d',pathwaysSystemsBiology='d',textMining='d',rnaExpression='d',animalModels='d',targetName='c'))
vec <- sapply(1:nrow(df), function(i){
x <- df[i,c(3,4,6:9)] %>% as.data.frame()
x[is.na(x)] <- 0
sum(x / rank(-x,ties.method="min")^2)
})
df <- df %>% mutate(opentarget=vec)
## data_nest
data_nest <- df %>% select(-targetName) %>% pivot_longer(-symbol, names_to='evidence', values_to='score') %>% filter(!is.na(score)) %>% nest(data=-evidence)
## ls_pNode_opentarget
ls_pNode_opentarget <- data_nest %>% mutate(pNode=map(data, function(x){
oPierGenes(x, network.customised=network.customised, restart=1, placeholder=placeholder, verbose=F)
})) %>% select(-data) %>% deframe()
## do evaluation
ls_pNode_opentarget_selected <- ls_pNode_opentarget[c("geneticAssociations","somaticMutations","pathwaysSystemsBiology" ,"textMining","animalModels","rnaExpression","opentarget")[c(1)]]
# do evaluation
ls_dT_selected <- c(ls_dT_methods, list(`Naive`=pNode_naive), ls_pNode_opentarget_selected)
ls_pPerf <- lapply(ls_dT_selected, function(x){
if (is(x,"pNode")){
prediction <- x$priority %>% select(name,priority)
}else if(is(x,"dTarget")){
prediction <- x$priority %>% select(name,rating)
}
pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})
# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_auc <- gp + coord_fixed()
# saved into a file 'PIA_auc.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_auc.pdf'), gp_auc, width=7, height=4)
# only for PIA (ROC + PR)
pPerf_ROC <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="ROC", highlight=T)
pPerf_PR <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="PR", highlight=T)
gp_both <- list(pPerf_ROC$gp+coord_fixed(), pPerf_PR$gp+coord_fixed()) %>% wrap_plots(ncol=2, guides='collect')
# saved into a file 'PIA_auc_pr.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_auc_pr.pdf'), gp_both, width=6, height=4)
Figure 5.1: AUC plot. FN, false negatives; FP, false positives; TN, true negatives; TP: true positives. The performance is compared with different rating schemes: ‘orderStatistic’ using an order statistic meta-analysis method to combine dataset-specific predictors, ‘fishers’ using a Fisher’s combined method, two conventional ones [‘sum’ (additively summing up across predictors) and ‘max’ (taking the maximum across predictors)], ‘Naive’ for naive rating (the frequency of being therapeutically targeted by existing licensed/approved drugs, which is motivated by drug repurposing), and ‘geneticAssociation’ for Open Targets - Genetics.
# KEGG: Asthma - Homo sapiens (human)
# https://www.genome.jp/pathway/hsa05310
ig.KEGG.individual <- oRDS('ig.KEGG.individual', placeholder=placeholder)
g <- ig.KEGG.individual %>% filter(pathway=='Asthma') %>% deframe()
g_path <- g[[1]]
# Enrichment analysis of the top 1% prioritised genes in terms of asthma known pathway genes
set <- list()
set$known <- V(g_path)$name
data <- dTarget$priority %>% top_frac(0.01,rating) %>% pull(name)
background <- dTarget$priority %>% pull(name)
eset_kegg <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
eset_kegg %>% oSEAextract() %>% transmute(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
ChEMBL_v30 <- oRDS("ChEMBL_v30", placeholder=placeholder)
# df_target_known
df_target_known <- ChEMBL_v30 %>% filter(efo_term=='asthma') %>% filter(target_number<=5, phase>=2) %>% select(Symbol,pref_name_drug,phase,mechanism_of_action,efo_term) %>% arrange(-phase,Symbol) %>% distinct()
# phase3above: phase-III or above targes for Asthma
phase3above <- df_target_known %>% filter(phase>=3) %>% pull(Symbol) %>% unique()
# phase2above: phase-II or above targes for Asthma
phase2above <- df_target_known %>% filter(phase>=2) %>% pull(Symbol) %>% unique()
# do leading prioritisation analysis (LPA)
customised.genesets <- list(phase3above=phase3above, phase2above=phase2above)
df <- dTarget$priority %>% top_frac(0.1,rating) %>% transmute(priority=rating, rank=rank)
set.seed(825)
eLPA <- oPierGSEA(df, customised.genesets=customised.genesets, size.range=c(15,5000), nperm=50000)
# df_summary_leading
## df_summary
df_summary <- eLPA$df_summary %>% mutate(frac=nLead/nAnno) %>% filter(frac<=1) %>% arrange(tolower(setID))
## df_leading
df_leading <- eLPA$leading[df_summary %>% pull(setID)] %>% enframe() %>% transmute(setID=name,member=value) %>% mutate(member_rank=map_chr(member, function(x) str_c(names(x),' (',x,')', collapse=', ')))
## df_summary_leading
df_summary_leading <- df_summary %>% inner_join(df_leading, by='setID')
top <- 'phase2above'
# leading plot
gp_leading <- oGSEAdotplot(eLPA, top=top, title='setID', subtitle='none', compact=T, colormap="spectral", zlim=c(0,5), peak.color='black', clab='Priority rating', signature=FALSE)
# leading plot (core)
leading.query <- dTarget$priority %>% top_frac(1,rating) %>% pull(name)
gp_leading_core <- oGSEAdotplot(eLPA, top=top, x.scale="normal", leading=T, leading.edge.only=T, leading.query.only=T, leading.query=leading.query, leading.size=2, leading.color='steelblue4', leading.alpha=0.95, colormap="spectral", zlim=c(0,5), peak.color='black', clab='Priority rating', signature=FALSE)
# gp_both
gp_both <- list(gp_leading_core, gp_leading) %>% wrap_plots(ncol=2, widths=c(2,1), guides=c('keep','collect')[2])
# saved into a file 'PIA_leading.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_leading.pdf'), gp_both, width=9, height=4.5)
# df_target_recovered
df <- df_summary_leading %>% filter(setID=='phase2above') %>% select(member) %>% deframe() %>% unlist() %>% names() %>% as_tibble() %>% set_names('Symbol') %>% inner_join(dTarget$priority %>% transmute(Symbol=name,rank,rating))
df_target_recovered <- ChEMBL_v30 %>% filter(efo_term=='asthma') %>% filter(target_number<=5) %>% filter(phase>=2) %>% inner_join(df, by='Symbol') %>% transmute(efo_term, phase, pref_name_drug=tolower(pref_name_drug), mechanism_of_action, GeneID, Symbol, rank, rating) %>% arrange(phase,pref_name_drug,rank)
# write into a file 'PIA_leading_recovered.txt' under the folder 'pia_showcase'
df_target_recovered %>% write_delim(file.path(outdir,'PIA_leading_recovered.txt'), delim='\t')
# HEB
library(ggraph)
df_xyw <- df_target_recovered %>% mutate(label=ifelse(phase==2,'phase-II', ifelse(phase==3,'phase-III','approved'))) %>% transmute(ynode=Symbol, xnode=str_c(pref_name_drug,' (',label,')'), weight=phase) %>% arrange(weight,ynode) %>% distinct()
big <- df_xyw %>% pivot_wider(names_from=xnode, values_from=weight, values_fill=list(weight=0)) %>% arrange(ynode) %>% column_to_rownames("ynode") %>% oBicreate()
V(big)$community <- V(big)$type
E(big)$weight <- 1
V(big)$size <- igraph::degree(big)
gp_heb <- oHEB(big, edge.alpha=0.5, edge.width=0.6, leave.label.expansion=1.01, edge.tension=0.7)
gp_heb <- gp_heb + guides(color='none', size='none') + scale_edge_colour_gradientn(limits=c(1,4), colors=oColormap('spectral')(64), guide=guide_edge_colorbar("Phase",barwidth=unit(8,'pt')))
## saved into a file 'PIA_leading_recovered_heb.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_leading_recovered_heb.pdf'), gp_heb, width=9.5, height=9.5)
Figure 7.1: Leading prioritisation plot for asthma phase-II or above therapeutic targets. Leading prioritisation analysis was performed to quantify the degree to which asthma phase-II or above therapeutic targets are enriched at the leading prioritisation. The leading prioritisation is defined as the core subset of prioritised target genes accounting for the enrichment signal, which can be visualised as the left-most region of the peak in running enrichment plot (middle panel). Asthma phase-II or above therapeutic targets recovered at the leading prioritisation (zoomed in left panel) are named and indicated in vertical lines (also color-coded by priority rating).
Target genes stored in the output file PIA_leading_recovered.txt
above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating
).
Figure 7.2: Hierarchical edge bundling visualises leading prioritised genes (in pink) that are already targeted by existing asthma phase-II or above drugs (in cyan). Nodes are sized by degree (the number of neighbors).
# per target, keep the maximum phased drugs only (along with disease indications)
ChEMBL_v30_DTT <- oRDS("ChEMBL_v30_DTT", placeholder=placeholder)
ChEMBL_v30_DTT <- ChEMBL_v30_DTT %>%
filter(!(efo_term %in% c('cancer','neoplasm','immune system disease'))) %>%
filter(!str_detect(efo_term,'neoplasm')) %>%
filter(!str_detect(efo_term,'cancer')) %>%
mutate(efo_term=str_replace_all(efo_term,'\\{http.*','')) %>%
mutate(efo_term=str_to_title(efo_term)) %>%
mutate(pref_name_drug=str_replace_all(pref_name_drug, ',', '')) %>%
mutate(pref_name_drug=str_to_lower(pref_name_drug)) %>%
mutate(pref_name_drug=str_c(pref_name_drug,' [',mechanism_of_action,']'))
# dtt: excluding 'asthma' but all approved
dtt <- ChEMBL_v30_DTT %>% filter(efo_term!='asthma', phase>=4) %>% mutate(target_number=1) %>% as.data.frame()
# vec_recovered_repurpose
vec_recovered_repurpose <- dtt %>% semi_join(df_target_recovered %>% distinct(Symbol), by='Symbol') %>% pull(Symbol) %>% unique()
# Enrichment analysis of Asthma phase-II or above drug targets recovered on leading prioritisation analysis in terms of all approved drug targets
set <- list()
set$known <- dtt$Symbol %>% unique()
data <- df_target_recovered %>% pull(Symbol) %>% unique()
background <- dTarget$priority %>% pull(name)
eset_known <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)
eset_known %>% oSEAextract() %>% select(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()
# it takes 47 min
ls_dTarget <- pbapply::pblapply(seq(length(ls_traits)), function(i){
message(sprintf("%d ('%s') (%s) ...", i, names(ls_traits)[i], as.character(Sys.time())), appendLF=TRUE)
data <- ls_traits[[i]] %>% select(snp, pvalue) %>% as.data.frame()
## first, genomic predictors
ls_pN_genomic <- oPierSNPsAdv(data, LD.customised=LD.customised, significance.threshold=5e-8, score.cap=100, distance.max=20000, decay.kernel="constant", GR.SNP=GR.SNP, GR.Gene=GR.Gene, include.QTL=include.QTL, include.RGB=include.RGB, network.customised=network.customised, placeholder=placeholder, verbose.details=F, verbose=F)
## Prioritisation
ls_pN <- base::Filter(base::Negate(is.null), ls_pN_genomic)
dT <- oPierMatrix(ls_pN, displayBy="pvalue", aggregateBy="orderStatistic", keep=F, placeholder=placeholder)
})
names(ls_dTarget) <- names(ls_traits)
# save into a file 'PIA.ls_dTarget.RData' under the folder 'pia_showcase'
save(list='ls_dTarget', file=file.path(outdir,'PIA.ls_dTarget.RData'))
ls_dTarget_selected <- ls_dTarget[c('adult onset asthma', 'childhood onset asthma')]
ls_gp_manhattan <- pbapply::pblapply(seq(length(ls_dTarget_selected)), function(i){
dT_tmp <- ls_dTarget_selected[[i]]
dT_tmp$priority <- dT_tmp$priority %>% top_frac(0.45,rating)
top.label.query <- dT_tmp$priority %>% inner_join(df_GR_gene, by=c('name'='Symbol')) %>% group_by(seqnames) %>% top_n(1,rating) %>% pull(name)
label.query.only <- F
y.lab <- str_c("Priority index\n(", names(ls_dTarget_selected)[i], ")")
gp_manhattan_chr <- oPierManhattan(dT_tmp, color=c("darkred", "steelblue4"), point.size=0.2, top=100000, top.label.type="text", top.label.size=2, top.label.col="black", top.label.query=top.label.query, label.query.only=label.query.only, y.lab=y.lab, signature=F, placeholder=placeholder)
})
# saved into a file 'PIA_onset_manhattan_chr.pdf' under the folder 'pia_showcase'
gp_manhattan_combined <- ls_gp_manhattan %>% wrap_plots(ncol=1, guides='collect')
ggsave(file.path(outdir,'PIA_onset_manhattan_chr.pdf'), gp_manhattan_combined, width=8, height=5)
Figure 8.1: Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top prioritised gene named per chromosome (x-axis). Top: adult-onset asthma; Bottom: children-onset asthma.
# ls_vec
ls_vec <- lapply(ls_dTarget_selected, function(dT){
dT$priority %>% top_frac(0.01,rating) %>% pull(name)
})
# sets
sets <- tibble(onto=c('MsigdbH')) %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
# esad
esad <- oSEAadv(ls_vec, sets, size.range=c(10,1500), test="fisher", min.overlap=5)
# oSEAggraph
ig <- oRDS("ig.MsigdbH", placeholder=placeholder)
ig %>% oIG2TB('nodes') %>% tail() %>% print(width=Inf)
V(ig)$label <- V(ig)$name %>% str_replace_all('HALLMARK_','') %>% str_replace_all('_',' ') %>% str_to_sentence()
gp <- esad %>% oSEAextract() %>% filter(adjp<0.05) %>% oSEAggraph(ig, fixed=T, node.color="adjp", colormap="grey90-orange-darkred", zlim=c(0,15), node.size="zscore",slim=c(0,15),node.size.range=c(0.5,4.5),node.label.size=2.5, node.label.direction=c('none','leftright','topbottom')[2], layout=c('stress','dendrogram')[2], leave=F, edge.color='steelblue', edge.alpha=0.2, edge.width=0.5)
# saved into a file 'PIA_onset_hallmark.pdf' under the folder 'pia_showcase'
gp_hallmark <- list(gp$gp_template, gp) %>% wrap_plots(ncol=1, heights=c(1,1), guides='collect')
ggsave(file.path(outdir,'PIA_onset_hallmark.pdf'), gp_hallmark, width=8, height=7.5)
Figure 8.2: Circular overview of molecular hallmarks (top panel), with enrichments shown for adult-onset asthma (bottom-left panel) and childhood-onset asthma (bottom-right panel) where nodes are sized by the enrichment z-score and colored by the enrichment significance (FDR).
ig.KEGG.category <- oRDS('ig.KEGG.category', placeholder=placeholder)
ig <- ig.KEGG.category %>% filter(category %in% c("Environmental Process","Organismal System")) %>% deframe() %>% oCombineNet()
ig2 <- oNetInduce(ig, nodes_query=V(ig)$name, largest.comp=T) %>% as.undirected()
ls_subg_perm <- pbapply::pblapply(seq(length(ls_dTarget_selected)), function(i){
message(sprintf("%d ('%s') (%s) ...", i, names(ls_dTarget_selected)[i], as.character(Sys.time())), appendLF=TRUE)
#subg <- oPierSubnet(ls_dTarget_selected[[i]], network.customised=ig2, subnet.size=50)
subg_perm <- oPierSubnet(ls_dTarget_selected[[i]], network.customised=ig2, subnet.size=50, test.permutation=T, num.permutation=10, respect="degree", aggregateBy="fishers")
})
names(ls_subg_perm) <- names(ls_dTarget_selected)
# save into a file 'PIA.ls_subg_perm.RData' under the folder 'pia_showcase'
save(list='ls_subg_perm', file=file.path(outdir,'PIA.ls_subg_perm.RData'))
# extract pathway crosstalk genes
ls_df_subg <- pbapply::pblapply(seq(length(ls_subg_perm)), function(i){
message(sprintf("%d ('%s') (%s) ...", i, names(ls_subg_perm)[i], as.character(Sys.time())), appendLF=TRUE)
dT <- ls_dTarget[[i]]
subg <- ls_subg_perm[[i]]
df_subg <- dT$priority %>% as_tibble() %>% mutate(subtype=names(ls_subg_perm)[i]) %>% select(subtype,name,rating,rank,seed,nGene,eGene,cGene,description) %>% semi_join(tibble(name=V(subg)$name), by='name')
})
df_subg <- do.call(rbind, ls_df_subg)
# write into a file 'PIA_onset_subg.txt' under the folder 'pia_showcase'
df_subg %>% write_delim(file.path(outdir,'PIA_onset_subg.txt'), delim='\t')
Pathway crosstalk genes stored in the output file PIA_onset_subg.txt
above can be explored below.
ls_gp_subg <- pbapply::pblapply(seq(length(ls_subg_perm)), function(i){
ig <- ls_subg_perm[[i]]
dTarget <- ls_dTarget_selected[[i]]
# degree
V(ig)$degree <- igraph::degree(ig)
# layout: xcoord + ycoord
ig <- ig %>% oLayout("graphlayouts.layout_with_stress")
# label
V(ig)$label <- tibble(name=V(ig)$name) %>% inner_join(dTarget$priority, by='name') %>% mutate(label=str_c(name)) %>% pull(label)
# gp_subg
gp_subg <- ig %>% oGGnetwork(node.label='label', node.label.size=2.5, node.label.color='black', node.label.alpha=1, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.05, node.xcoord="xcoord", node.ycoord="ycoord", node.color="priority", node.color.title='Priority rating', colormap="spectral", zlim=c(0,5), node.color.alpha=0.95, node.size='degree', node.size.title='degree', node.size.range=c(1.5,4.5), edge.color="steelblue4", edge.color.alpha=0.2,edge.size=0.3,edge.curve=0.05,edge.arrow.gap=0.01)
gp_subg + guides(size="none")
})
names(ls_gp_subg) <- names(ls_subg_perm)
# saved into a file 'PIA_onset_subg.pdf' under the folder 'pia_showcase'
gp_subg <- ls_gp_subg %>% wrap_plots(ncol=1, heights=c(1,1), guides='collect')
ggsave(file.path(outdir,'PIA_onset_subg.pdf'), gp_subg, width=7, height=8)
Figure 8.3: Visualisations of pathway crosstalks are illustrated separately for adult-onset asthma (top panel) and childhood-onset asthma (bottom panel), each identified by integrating the target priority rating information with pathway-derived gene interactions (merged from KEGG pathways).
mat_priority <- df_subg %>% select(trait, name, rating) %>% pivot_wider(names_from=trait, values_from=rating)
mat_priority <- mat_priority %>% mutate(code=str_c(0+!is.na(`adult onset asthma`), 0+!is.na(`childhood onset asthma`))) %>% arrange(desc(code),name)
# write into a file 'PIA_onset_heatmap.txt' under the folder 'pic2_showcase'
mat_priority %>% write_delim(file.path(outdir,'PIA_onset_heatmap.txt'), delim='\t')
# heatmap
data <- mat_priority %>% select(-code) %>% column_to_rownames('name')
gp_heatmap <- oHeatmap(data, zlim=c(0,5), colormap="spectral", x.rotate=45, x.text.size=7, y.text.size=7, legend.title="Priority rating", legend.text.size=6, legend.title.size=8, shape=19, size=2.5)
# saved into a file 'PIA_onset_heatmap.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_onset_heatmap.pdf'), gp_heatmap, width=2, height=8)
Heatmap in the output file PIA_onset_heatmap.txt
above can be explored below.
Figure 8.4: Heatmap illustrates pathway crosstalk genes for adult-onset asthma (the 1st column) and childhood-onset asthma (the 2nd column), with genes divided into three groups: shared, specific to adult-onset asthma, and specific to childhood-onset asthma.
# enrichment analysis
## ls_code
ls_code <- mat_priority %>% select(name,code) %>% nest(data=-code) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
## background
background <- mat_priority %>% pull(name)
## sets
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(ls_code, sets, background=NULL, size.range=c(10,500), min.overlap=10)
# balloon plot
## top two enrichments per module
gp <- esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% oSEAballoon(top=5, adjp.cutoff=0.05, zlim=NULL, slim=NULL, size.range=c(1,6), shape=19, colormap="grey90-orange-darkred")
gp_kegg <- gp + theme(axis.text.y=element_text(size=6),axis.text.x=element_text(size=6,angle=0),strip.text.y=element_text(size=6,angle=0))
# save into a file 'PIA_onset_kegg.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'PIA_onset_kegg.pdf'), gp_kegg, width=4.5, height=2)
# write into a file 'PIA_onset_kegg.txt' under the folder 'pipe_showcase'
esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% arrange(group,adjp) %>% write_delim(file.path(outdir,'PIA_onset_kegg.txt'), delim='\t')
Enriched pathways stored in the output file PIA_onset_kegg.txt
above can be explored below.
Figure 8.5: KEGG pathway enrichment analysis for genes within each of three groups coded as ‘11’ (shared), ‘10’ (specific to adult-onset asthma), and ‘01’ (specific to childhood-onset asthma). Enrichment analysis was based on one-sided Fisher’s exact test.
# ontology annotation: two categories 'approved' and 'phased' (phases 2 and 3)
set <- ChEMBL_v30_DTT %>% filter(phase>=2) %>%
select(Symbol,phase) %>% arrange(-phase) %>%
distinct(Symbol, .keep_all=T) %>% filter(phase>=2) %>%
mutate(phase=ifelse(phase==4,'approved','phased')) %>%
nest(data=-phase) %>% mutate(data=map(data,~pull(.x))) %>%
deframe()
# enrichment analysis
ls_vec <- lapply(ls_dTarget_selected, function(dT){
dT$priority %>% pull(name)
})
background <- unlist(ls_vec) %>% unique()
ls_res <- lapply(1:length(ls_code), function(i){
eset <- oSEA(ls_code[[i]], set, background=background, size.range=c(10,20000), min.overlap=1)
eset %>% oSEAextract() %>% mutate(group=names(ls_code)[i])
})
df_res <- do.call(rbind, ls_res)
# forest plot
df_res <- df_res %>% filter(name %in% c('approved','phased')) %>% mutate(adjp=p.adjust(pvalue,method="BH"))
gp_forest <- df_res %>% oSEAforest(adjp.cutoff=1, zlim=NULL, colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="or")
# save into a file 'PIA_onset_repurpose.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_onset_repurpose.pdf'), gp_forest, width=7, height=2)
# write into a file 'PIA_onset_repurpose.txt' under the folder 'pipe_showcase'
df_res %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'PIA_onset_repurpose.txt'), delim='\t')
Enriched approved or phased drug target categories stored in the output file PIA_onset_repurpose.txt
above can be explored below.
Figure 8.6: Forest plot of approved or phased drug target enrichments. Approved and phased drug targets are sourced from ChEMBL.
Drug repurposing dotplot for childhood-onset-specific crosstalk genes (coded 01
)
# dtt
dtt <- ChEMBL_v30_DTT %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()
# vec_data
df_data <- tibble(Symbol=ls_code$`01`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)
# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=60, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)
# write into a file 'PIA_onset_repurpose_01.txt' under the folder 'pia_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'PIA_onset_repurpose_01.txt'), delim='\t')
# save into a file 'PIA_onset_repurpose_01.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right",limits=rev)+ scale_x_discrete(position="top",limits=rev) + coord_flip()
ggsave(file.path(outdir,'PIA_onset_repurpose_01.pdf'), gp_DR, width=2.1, height=4)
# save into a file 'PIA_onset_repurpose_01_index.pdf' under the folder 'pia_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
## one-column table
gp_index <- DR$index %>% mutate(Drug=str_replace_all(Drug,', ','\n')) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
grid::grid.draw(gp_index)
ggsave(file.path(outdir,'PIA_onset_repurpose_01_index.pdf'), gp_index, width=4, height=4)
Figure 8.9: Dot plot showing 4 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Dots are referenced in integers (see the next).
Figure 8.10: The information for referenced drugs and mechanisms of action. See referenced integers above.
Drug-target-disease information stored in the output file PIA_onset_repurpose_01.txt
above can be explored below.