4 Genetics-led target prioritisation

4.1 Input data

# read GWAS summary data for asthma (European or British populations)
GWAScatalog <- oRDS("GWAScatalog", placeholder=placeholder)
input_data_asthma <- GWAScatalog %>% filter(pvalue<5e-8) %>% filter(str_detect(EF_name,"asthma")) %>% filter(str_detect(initial_sample_description,"European|British")) %>% transmute(trait=EF_name, snp=name, pvalue)

# ls_traits
ls_traits <- list()
## asthma (irrespective of onset ages)
ls_traits[[1]] <- input_data_asthma %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='asthma', snp, pvalue)
## adult onset asthma
ls_traits[[2]] <- input_data_asthma %>% filter(str_detect(trait,'adult onset asthma')) %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='adult onset asthma', snp, pvalue)
## childhood onset asthma
ls_traits[[3]] <- input_data_asthma %>% filter(str_detect(trait,'childhood onset asthma')) %>% arrange(pvalue) %>% distinct(snp, .keep_all=T) %>% transmute(trait='childhood onset asthma', snp, pvalue)
## assign with names
names(ls_traits) <- c("asthma","adult onset asthma","childhood onset asthma")

4.2 Parameters and built-in datasets

# LD.customised
LD.customised <- oRDS('GWAS_LD.EUR', placeholder=placeholder) %>% as.data.frame()

# GR.SNP
#getOption("timeout"): 60
options(timeout=120) # please increase the timeout if failed to download
GR.SNP <- oRDS("dbSNP_Common", placeholder=placeholder)

# GR.Gene
GR.Gene <- oRDS("UCSC_knownGene", placeholder=placeholder)
df_GR_gene <- GR.Gene %>% as.data.frame() %>% as_tibble()

# include.RGB (53)
include.RGB <- c("PCHiC_PMID27863249_Activated_total_CD4_T_cells","PCHiC_PMID27863249_Endothelial_precursors","PCHiC_PMID27863249_Erythroblasts","PCHiC_PMID27863249_Fetal_thymus","PCHiC_PMID27863249_Macrophages_M0","PCHiC_PMID27863249_Macrophages_M1","PCHiC_PMID27863249_Macrophages_M2","PCHiC_PMID27863249_Megakaryocytes","PCHiC_PMID27863249_Monocytes","PCHiC_PMID27863249_Naive_B_cells","PCHiC_PMID27863249_Naive_CD4_T_cells","PCHiC_PMID27863249_Naive_CD8_T_cells","PCHiC_PMID27863249_Neutrophils","PCHiC_PMID27863249_Nonactivated_total_CD4_T_cells","PCHiC_PMID27863249_Total_B_cells","PCHiC_PMID27863249_Total_CD4_T_cells","PCHiC_PMID27863249_Total_CD8_T_cells", "PCHiC_PMID31501517_AdrenalGland","PCHiC_PMID31501517_Aorta","PCHiC_PMID31501517_Bladder","PCHiC_PMID31501517_Cardiomyocytes","PCHiC_PMID31501517_DorsolateralPrefrontalCortex","PCHiC_PMID31501517_Esophagus","PCHiC_PMID31501517_Fat","PCHiC_PMID31501517_Fibroblast","PCHiC_PMID31501517_Gastric","PCHiC_PMID31501517_H1","PCHiC_PMID31501517_H1MesenchymalStemCell","PCHiC_PMID31501517_H1MesendodermStemCell","PCHiC_PMID31501517_H1NeuralProgenitorCell","PCHiC_PMID31501517_Hippocampus","PCHiC_PMID31501517_LCL","PCHiC_PMID31501517_LeftHeartVentricle","PCHiC_PMID31501517_Liver","PCHiC_PMID31501517_Lung","PCHiC_PMID31501517_Ovary","PCHiC_PMID31501517_Pancreas","PCHiC_PMID31501517_Psoas","PCHiC_PMID31501517_RightHeartVentricle","PCHiC_PMID31501517_RightHeatAtrium","PCHiC_PMID31501517_SigmoidColon","PCHiC_PMID31501517_SmallBowel","PCHiC_PMID31501517_Spleen","PCHiC_PMID31501517_Thymus","PCHiC_PMID31501517_Trophoblast", "PCHiC_PMID31367015_astrocytes","PCHiC_PMID31367015_excitatory","PCHiC_PMID31367015_hippocampal","PCHiC_PMID31367015_motor", "PCHiC_PMID31253982_islet","PCHiC_PMID29955040_CMhESC", "PCHiC_PMID25938943_CD34","PCHiC_PMID25938943_GM12878")

# include.QTL (114)
include.QTL <- c("eQTL_eQTLGen", "pQTL_Plasma")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNg","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_IFNgSalmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_Salmonella","eQTL_eQTLCatalogue_Alasoo_2018_macrophage_naive","eQTL_eQTLCatalogue_BLUEPRINT_Tcell","eQTL_eQTLCatalogue_BLUEPRINT_monocyte","eQTL_eQTLCatalogue_BLUEPRINT_neutrophil","eQTL_eQTLCatalogue_BrainSeq_brain","eQTL_eQTLCatalogue_CEDAR_microarray_Bcell_CD19","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD4","eQTL_eQTLCatalogue_CEDAR_microarray_Tcell_CD8","eQTL_eQTLCatalogue_CEDAR_microarray_ileum","eQTL_eQTLCatalogue_CEDAR_microarray_monocyte_CD14","eQTL_eQTLCatalogue_CEDAR_microarray_neutrophil_CD15","eQTL_eQTLCatalogue_CEDAR_microarray_platelet","eQTL_eQTLCatalogue_CEDAR_microarray_rectum","eQTL_eQTLCatalogue_CEDAR_microarray_transverse_colon","eQTL_eQTLCatalogue_FUSION_adipose_naive","eQTL_eQTLCatalogue_FUSION_muscle_naive","eQTL_eQTLCatalogue_Fairfax_2012_microarray_Bcell_CD19","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_IFN24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS2","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_LPS24","eQTL_eQTLCatalogue_Fairfax_2014_microarray_monocyte_naive","eQTL_eQTLCatalogue_GENCORD_LCL","eQTL_eQTLCatalogue_GENCORD_Tcell","eQTL_eQTLCatalogue_GENCORD_fibroblast","eQTL_eQTLCatalogue_GEUVADIS_LCL")
include.QTL <- c(include.QTL,"eQTL_eQTLCatalogue_GTEx_V8_Adipose_Subcutaneous","eQTL_eQTLCatalogue_GTEx_V8_Adipose_Visceral_Omentum","eQTL_eQTLCatalogue_GTEx_V8_Adrenal_Gland","eQTL_eQTLCatalogue_GTEx_V8_Artery_Aorta","eQTL_eQTLCatalogue_GTEx_V8_Artery_Coronary","eQTL_eQTLCatalogue_GTEx_V8_Artery_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Brain_Amygdala","eQTL_eQTLCatalogue_GTEx_V8_Brain_Anterior_cingulate_cortex_BA24","eQTL_eQTLCatalogue_GTEx_V8_Brain_Caudate_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellar_Hemisphere","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cerebellum","eQTL_eQTLCatalogue_GTEx_V8_Brain_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Brain_Frontal_Cortex_BA9","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hippocampus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Hypothalamus","eQTL_eQTLCatalogue_GTEx_V8_Brain_Nucleus_accumbens_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Putamen_basal_ganglia","eQTL_eQTLCatalogue_GTEx_V8_Brain_Spinal_cord_cervical_c1","eQTL_eQTLCatalogue_GTEx_V8_Brain_Substantia_nigra","eQTL_eQTLCatalogue_GTEx_V8_Breast_Mammary_Tissue","eQTL_eQTLCatalogue_GTEx_V8_Cells_Cultured_fibroblasts","eQTL_eQTLCatalogue_GTEx_V8_Cells_EBVtransformed_lymphocytes","eQTL_eQTLCatalogue_GTEx_V8_Colon_Sigmoid","eQTL_eQTLCatalogue_GTEx_V8_Colon_Transverse","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Gastroesophageal_Junction","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Mucosa","eQTL_eQTLCatalogue_GTEx_V8_Esophagus_Muscularis","eQTL_eQTLCatalogue_GTEx_V8_Heart_Atrial_Appendage","eQTL_eQTLCatalogue_GTEx_V8_Heart_Left_Ventricle","eQTL_eQTLCatalogue_GTEx_V8_Kidney_Cortex","eQTL_eQTLCatalogue_GTEx_V8_Liver","eQTL_eQTLCatalogue_GTEx_V8_Lung","eQTL_eQTLCatalogue_GTEx_V8_Minor_Salivary_Gland","eQTL_eQTLCatalogue_GTEx_V8_Muscle_Skeletal","eQTL_eQTLCatalogue_GTEx_V8_Nerve_Tibial","eQTL_eQTLCatalogue_GTEx_V8_Ovary","eQTL_eQTLCatalogue_GTEx_V8_Pancreas","eQTL_eQTLCatalogue_GTEx_V8_Pituitary","eQTL_eQTLCatalogue_GTEx_V8_Prostate","eQTL_eQTLCatalogue_GTEx_V8_Skin_Not_Sun_Exposed_Suprapubic","eQTL_eQTLCatalogue_GTEx_V8_Skin_Sun_Exposed_Lower_leg","eQTL_eQTLCatalogue_GTEx_V8_Small_Intestine_Terminal_Ileum","eQTL_eQTLCatalogue_GTEx_V8_Spleen","eQTL_eQTLCatalogue_GTEx_V8_Stomach","eQTL_eQTLCatalogue_GTEx_V8_Testis","eQTL_eQTLCatalogue_GTEx_V8_Thyroid","eQTL_eQTLCatalogue_GTEx_V8_Uterus","eQTL_eQTLCatalogue_GTEx_V8_Vagina","eQTL_eQTLCatalogue_GTEx_V8_Whole_Blood")
include.QTL <- c(include.QTL, "eQTL_eQTLCatalogue_HipSci_iPSC","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD4","eQTL_eQTLCatalogue_Kasela_2017_microarray_Tcell_CD8","eQTL_eQTLCatalogue_Lepik_2017_blood","eQTL_eQTLCatalogue_Naranbhai_2015_microarray_neutrophil_CD16","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Listeria","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_Salmonella","eQTL_eQTLCatalogue_Nedelec_2016_macrophage_naive","eQTL_eQTLCatalogue_Quach_2016_monocyte_IAV","eQTL_eQTLCatalogue_Quach_2016_monocyte_LPS","eQTL_eQTLCatalogue_Quach_2016_monocyte_Pam3CSK4","eQTL_eQTLCatalogue_Quach_2016_monocyte_R848","eQTL_eQTLCatalogue_Quach_2016_monocyte_naive","eQTL_eQTLCatalogue_ROSMAP_brain_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Bcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD4_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_antiCD3CD28","eQTL_eQTLCatalogue_Schmiedel_2018_CD8_Tcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_NKcell_naive","eQTL_eQTLCatalogue_Schmiedel_2018_Tfh_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th117_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th17_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th1_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Th2_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_memory","eQTL_eQTLCatalogue_Schmiedel_2018_Treg_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_CD16_naive","eQTL_eQTLCatalogue_Schmiedel_2018_monocyte_naive","eQTL_eQTLCatalogue_Schwartzentruber_2018_sensory_neuron","eQTL_eQTLCatalogue_TwinsUK_LCL","eQTL_eQTLCatalogue_TwinsUK_blood","eQTL_eQTLCatalogue_TwinsUK_fat","eQTL_eQTLCatalogue_TwinsUK_skin","eQTL_eQTLCatalogue_van_de_Bunt_2015_pancreatic_islet")

# network.customised
network.customised <- oDefineNet(network="STRING_high", STRING.only=c("experimental_score","database_score"), placeholder=placeholder)

4.3 Do prioritisation

input_data <- ls_traits$`asthma` %>% select(snp,pvalue) %>% as.data.frame()

# prepare predictors (it takes 16 min or so)
ls_pNode_genomic <- oPierSNPsAdv(data=input_data, LD.customised=LD.customised, significance.threshold=5e-8, score.cap=100, distance.max=20000, decay.kernel="constant", GR.SNP=GR.SNP, GR.Gene=GR.Gene, include.QTL=include.QTL, include.RGB=include.RGB, network.customised=network.customised, placeholder=placeholder, verbose.details=F, verbose=F, parallel=F)

# save into a file 'PIA.ls_pNode_genomic.RData'
save(list='ls_pNode_genomic', file='PIA.ls_pNode_genomic.RData')

# do prioritisation
ls_pNode <- Filter(Negate(is.null), ls_pNode_genomic)
dTarget <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy="orderStatistic", keep=F, placeholder=placeholder)

# write into a file 'PIA_priority.txt.gz' under the folder 'pia_showcase'
dTarget$priority %>% select(name,rating,rank,seed,nGene,eGene,cGene,description) %>% write_delim(file.path(outdir,'PIA_priority.txt.gz'), delim='\t')

# save into a file 'PIA.dTarget.RData' under the folder 'pia_showcase'
save(list='dTarget', file=file.path(outdir,'PIA.dTarget.RData'))

4.4 Prioritised target genes

Target genes stored in the output file PIA_priority.txt.gz above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating).

4.5 Manhattan plot

# top 1 per chromosome
dT_tmp <- dTarget
dT_tmp$priority <- dTarget$priority %>% top_frac(0.5,rating)
top.label.query <- dT_tmp$priority %>% inner_join(df_GR_gene, by=c('name'='Symbol')) %>% group_by(seqnames) %>% top_n(1,rating) %>% pull(name)
label.query.only <- F
gp_manhattan_chr <- oPierManhattan(dT_tmp, color=c("darkred", "steelblue4"), point.size=0.2, top=100000, top.label.type="text", top.label.size=2, top.label.col="black", top.label.query=top.label.query, label.query.only=label.query.only, y.lab="Priority index", signature=F, placeholder=placeholder)
# saved into a file 'PIA_manhattan_chr.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_manhattan_chr.pdf'), gp_manhattan_chr, width=8, height=3)

Figure 4.1: Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top prioritised gene named per chromosome (x-axis).

Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top prioritised gene named per chromosome (x-axis).

5 Benchmarking

# GSP (phase 2 or above drug targets for asthma)
# GSN (simulated negative targets)
ig.EF.ChEMBL <- oRDS('ig.EF.ChEMBL', placeholder=placeholder)
ind <- str_detect(V(ig.EF.ChEMBL)$name,'asthma') %>% which()
GSP <- V(ig.EF.ChEMBL)$GSP[[ind]]
GSN <- V(ig.EF.ChEMBL)$GSN[[ind]]

5.1 Methods combining predictors

vec_methods <- c("fishers","orderStatistic","max","sum")
names(vec_methods) <- vec_methods
ls_dT_methods <- pbapply::pblapply(vec_methods, function(x){
    if(x %in% c("max","sum","harmonic")){
        dT <- oPierMatrix(ls_pNode, displayBy="score", aggregateBy=x, keep=F, placeholder=placeholder, verbose=F)
    }else if(x %in% c("fishers","logistic","Ztransform","orderStatistic")){
        dT <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy=x, keep=F, placeholder=placeholder, verbose=F)
    }
})
ls_pPerf <- lapply(ls_dT_methods, function(x){
    if(is(x,"pNode")){
        prediction <- x$priority %>% select(name,priority)
    }else if(is(x,"dTarget")){
        prediction <- x$priority %>% select(name,rating)
    }
    pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})
# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_methods <- gp + coord_fixed()

5.2 Naive prediction

ChEMBL_v30 <- oRDS("ChEMBL_v30", placeholder=placeholder)

# naive prediction: by the frequency of approved drugs targeting a gene
data_naive <- ChEMBL_v30 %>% filter(target_number<=5, phase>=4) %>% select(chembl_id_drug,Symbol) %>% distinct() %>% count(Symbol) %>% arrange(-n) %>% as.data.frame()

# pNode_naive
pNode_naive <- oPierGenes(data_naive, network.customised=network.customised, restart=1, placeholder=placeholder)

5.3 Open Targets Genetics

# From Open Targets to download: ASM_MONDO_0004979-associated-diseases.tsv
opentargets_file <- list.files(path=outdir, pattern=str_c("^ASM_.*.tsv"), full.names=T)
df <- read_delim(opentargets_file, delim='\t', col_types=cols_only(symbol='c',overallAssociationScore='d',geneticAssociations='d',somaticMutations='d',drugs='d',pathwaysSystemsBiology='d',textMining='d',rnaExpression='d',animalModels='d',targetName='c'))
vec <- sapply(1:nrow(df), function(i){
    x <- df[i,c(3,4,6:9)] %>% as.data.frame()
    x[is.na(x)] <- 0
    sum(x / rank(-x,ties.method="min")^2)
})
df <- df %>% mutate(opentarget=vec)

## data_nest
data_nest <- df %>% select(-targetName) %>% pivot_longer(-symbol, names_to='evidence', values_to='score') %>% filter(!is.na(score)) %>% nest(data=-evidence)

## ls_pNode_opentarget
ls_pNode_opentarget <- data_nest %>% mutate(pNode=map(data, function(x){
    oPierGenes(x, network.customised=network.customised, restart=1, placeholder=placeholder, verbose=F)
})) %>% select(-data) %>% deframe()
## do evaluation
ls_pNode_opentarget_selected <- ls_pNode_opentarget[c("geneticAssociations","somaticMutations","pathwaysSystemsBiology" ,"textMining","animalModels","rnaExpression","opentarget")[c(1)]]

5.4 ROC plot

# do evaluation
ls_dT_selected <- c(ls_dT_methods, list(`Naive`=pNode_naive), ls_pNode_opentarget_selected)
ls_pPerf <- lapply(ls_dT_selected, function(x){
    if (is(x,"pNode")){
        prediction <- x$priority %>% select(name,priority)
    }else if(is(x,"dTarget")){
        prediction <- x$priority %>% select(name,rating)
    }
    pPerf <- oClassifyPerf(prediction=prediction, GSP=GSP, GSN=GSN)
})

# do plotting
gp <- oClassifyComp(ls_pPerf, displayBy="ROC", type="curve", facet=F, sort=T)
gp_auc <- gp + coord_fixed()
# saved into a file 'PIA_auc.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_auc.pdf'), gp_auc, width=7, height=4)

# only for PIA (ROC + PR)
pPerf_ROC <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="ROC", highlight=T)
pPerf_PR <- oClassifyPerf(dTarget$priority %>% select(name,rating), GSP=GSP, GSN=GSN, plot="PR", highlight=T)
gp_both <- list(pPerf_ROC$gp+coord_fixed(), pPerf_PR$gp+coord_fixed()) %>% wrap_plots(ncol=2, guides='collect')
# saved into a file 'PIA_auc_pr.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_auc_pr.pdf'), gp_both, width=6, height=4)

Figure 5.1: AUC plot. FN, false negatives; FP, false positives; TN, true negatives; TP: true positives. The performance is compared with different rating schemes: ‘orderStatistic’ using an order statistic meta-analysis method to combine dataset-specific predictors, ‘fishers’ using a Fisher’s combined method, two conventional ones [‘sum’ (additively summing up across predictors) and ‘max’ (taking the maximum across predictors)], ‘Naive’ for naive rating (the frequency of being therapeutically targeted by existing licensed/approved drugs, which is motivated by drug repurposing), and ‘geneticAssociation’ for Open Targets - Genetics.

AUC plot. FN, false negatives; FP, false positives; TN, true negatives; TP: true positives. The performance is compared with different rating schemes: 'orderStatistic' using an order statistic meta-analysis method to combine dataset-specific predictors, 'fishers' using a Fisher's combined method, two conventional ones ['sum' (additively summing up across predictors) and 'max' (taking the maximum across predictors)], 'Naive' for naive rating (the frequency of being therapeutically targeted by existing licensed/approved drugs, which is motivated by drug repurposing), and 'geneticAssociation' for Open Targets - Genetics.

6 Disease relevance

# KEGG: Asthma - Homo sapiens (human)
# https://www.genome.jp/pathway/hsa05310

ig.KEGG.individual <- oRDS('ig.KEGG.individual', placeholder=placeholder)
g <- ig.KEGG.individual %>% filter(pathway=='Asthma') %>% deframe()
g_path <- g[[1]]

# Enrichment analysis of the top 1% prioritised genes in terms of asthma known pathway genes
set <- list()
set$known <- V(g_path)$name
data <- dTarget$priority %>% top_frac(0.01,rating) %>% pull(name)
background <- dTarget$priority %>% pull(name)
eset_kegg <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)

eset_kegg %>% oSEAextract() %>% transmute(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()

7 Leading prioritisation analysis

7.1 LPA analysis

ChEMBL_v30 <- oRDS("ChEMBL_v30", placeholder=placeholder)

# df_target_known
df_target_known <- ChEMBL_v30 %>% filter(efo_term=='asthma') %>% filter(target_number<=5, phase>=2) %>% select(Symbol,pref_name_drug,phase,mechanism_of_action,efo_term) %>% arrange(-phase,Symbol) %>% distinct()

# phase3above: phase-III or above targes for Asthma
phase3above <- df_target_known %>% filter(phase>=3) %>% pull(Symbol) %>% unique()
# phase2above: phase-II or above targes for Asthma
phase2above <- df_target_known %>% filter(phase>=2) %>% pull(Symbol) %>% unique()

# do leading prioritisation analysis (LPA)
customised.genesets <- list(phase3above=phase3above, phase2above=phase2above)
df <- dTarget$priority %>% top_frac(0.1,rating) %>% transmute(priority=rating, rank=rank)
set.seed(825)
eLPA <- oPierGSEA(df, customised.genesets=customised.genesets, size.range=c(15,5000), nperm=50000)

# df_summary_leading
## df_summary
df_summary <- eLPA$df_summary %>% mutate(frac=nLead/nAnno) %>% filter(frac<=1) %>% arrange(tolower(setID))
## df_leading
df_leading <- eLPA$leading[df_summary %>% pull(setID)] %>% enframe() %>% transmute(setID=name,member=value) %>% mutate(member_rank=map_chr(member, function(x) str_c(names(x),' (',x,')', collapse=', ')))
## df_summary_leading
df_summary_leading <- df_summary %>% inner_join(df_leading, by='setID')

7.2 Leading plot

top <- 'phase2above'

# leading plot
gp_leading <- oGSEAdotplot(eLPA, top=top, title='setID', subtitle='none', compact=T, colormap="spectral", zlim=c(0,5), peak.color='black', clab='Priority rating', signature=FALSE)

# leading plot (core)
leading.query <- dTarget$priority %>% top_frac(1,rating) %>% pull(name)
gp_leading_core <- oGSEAdotplot(eLPA, top=top, x.scale="normal", leading=T, leading.edge.only=T, leading.query.only=T, leading.query=leading.query, leading.size=2, leading.color='steelblue4', leading.alpha=0.95, colormap="spectral", zlim=c(0,5), peak.color='black', clab='Priority rating', signature=FALSE)

# gp_both
gp_both <- list(gp_leading_core, gp_leading) %>% wrap_plots(ncol=2, widths=c(2,1), guides=c('keep','collect')[2])

# saved into a file 'PIA_leading.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_leading.pdf'), gp_both, width=9, height=4.5)

# df_target_recovered
df <- df_summary_leading %>% filter(setID=='phase2above') %>% select(member) %>% deframe() %>% unlist() %>% names() %>% as_tibble() %>% set_names('Symbol') %>% inner_join(dTarget$priority %>% transmute(Symbol=name,rank,rating))

df_target_recovered <- ChEMBL_v30 %>% filter(efo_term=='asthma') %>% filter(target_number<=5) %>% filter(phase>=2) %>% inner_join(df, by='Symbol') %>% transmute(efo_term, phase, pref_name_drug=tolower(pref_name_drug), mechanism_of_action, GeneID, Symbol, rank, rating) %>% arrange(phase,pref_name_drug,rank)

# write into a file 'PIA_leading_recovered.txt' under the folder 'pia_showcase'
df_target_recovered %>% write_delim(file.path(outdir,'PIA_leading_recovered.txt'), delim='\t')

# HEB
library(ggraph)
df_xyw <- df_target_recovered %>% mutate(label=ifelse(phase==2,'phase-II', ifelse(phase==3,'phase-III','approved'))) %>% transmute(ynode=Symbol, xnode=str_c(pref_name_drug,' (',label,')'), weight=phase) %>% arrange(weight,ynode) %>% distinct()
big <- df_xyw %>% pivot_wider(names_from=xnode, values_from=weight, values_fill=list(weight=0)) %>% arrange(ynode) %>% column_to_rownames("ynode") %>% oBicreate()
V(big)$community <- V(big)$type
E(big)$weight <- 1
V(big)$size <- igraph::degree(big)
gp_heb <- oHEB(big, edge.alpha=0.5, edge.width=0.6, leave.label.expansion=1.01, edge.tension=0.7)
gp_heb <- gp_heb + guides(color='none', size='none') + scale_edge_colour_gradientn(limits=c(1,4), colors=oColormap('spectral')(64), guide=guide_edge_colorbar("Phase",barwidth=unit(8,'pt')))
## saved into a file 'PIA_leading_recovered_heb.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_leading_recovered_heb.pdf'), gp_heb, width=9.5, height=9.5)

Figure 7.1: Leading prioritisation plot for asthma phase-II or above therapeutic targets. Leading prioritisation analysis was performed to quantify the degree to which asthma phase-II or above therapeutic targets are enriched at the leading prioritisation. The leading prioritisation is defined as the core subset of prioritised target genes accounting for the enrichment signal, which can be visualised as the left-most region of the peak in running enrichment plot (middle panel). Asthma phase-II or above therapeutic targets recovered at the leading prioritisation (zoomed in left panel) are named and indicated in vertical lines (also color-coded by priority rating).

Leading prioritisation plot for asthma phase-II or above therapeutic targets. Leading prioritisation analysis was performed to quantify the degree to which asthma phase-II or above therapeutic targets are enriched at the leading prioritisation. The leading prioritisation is defined as the core subset of prioritised target genes accounting for the enrichment signal, which can be visualised as the left-most region of the peak in running enrichment plot (middle panel). Asthma phase-II or above therapeutic targets recovered at the leading prioritisation (zoomed in left panel) are named and indicated in vertical lines (also color-coded by priority rating).

Target genes stored in the output file PIA_leading_recovered.txt above can be explored below. Notes, genes are ranked by priority rating (ranged 0-5; see the column rating).

Figure 7.2: Hierarchical edge bundling visualises leading prioritised genes (in pink) that are already targeted by existing asthma phase-II or above drugs (in cyan). Nodes are sized by degree (the number of neighbors).

Hierarchical edge bundling visualises leading prioritised genes (in pink) that are already targeted by existing asthma phase-II or above drugs (in cyan). Nodes are sized by degree (the number of neighbors).

7.3 Leading genes recovered enriched for approved drug targets

# per target, keep the maximum phased drugs only (along with disease indications)
ChEMBL_v30_DTT <- oRDS("ChEMBL_v30_DTT", placeholder=placeholder)
ChEMBL_v30_DTT <- ChEMBL_v30_DTT %>% 
    filter(!(efo_term %in% c('cancer','neoplasm','immune system disease'))) %>% 
    filter(!str_detect(efo_term,'neoplasm')) %>% 
    filter(!str_detect(efo_term,'cancer')) %>%
    mutate(efo_term=str_replace_all(efo_term,'\\{http.*','')) %>%
    mutate(efo_term=str_to_title(efo_term)) %>%
    mutate(pref_name_drug=str_replace_all(pref_name_drug, ',', '')) %>%
    mutate(pref_name_drug=str_to_lower(pref_name_drug)) %>%
    mutate(pref_name_drug=str_c(pref_name_drug,' [',mechanism_of_action,']'))

# dtt: excluding 'asthma' but all approved
dtt <- ChEMBL_v30_DTT %>% filter(efo_term!='asthma', phase>=4) %>% mutate(target_number=1) %>% as.data.frame()

# vec_recovered_repurpose
vec_recovered_repurpose <- dtt %>% semi_join(df_target_recovered %>% distinct(Symbol), by='Symbol') %>% pull(Symbol) %>% unique()

# Enrichment analysis of Asthma phase-II or above drug targets recovered on leading prioritisation analysis in terms of all approved drug targets
set <- list()
set$known <- dtt$Symbol %>% unique()
data <- df_target_recovered %>% pull(Symbol) %>% unique()
background <- dTarget$priority %>% pull(name)
eset_known <- oSEA(data, set, background=background, size.range=c(10,20000), min.overlap=1)

eset_known %>% oSEAextract() %>% select(pvalue,or,CIl,CIu,nA,nO,overlap) %>% DT::datatable()

8 Prioritisation for different subtypes

# it takes 47 min
ls_dTarget <- pbapply::pblapply(seq(length(ls_traits)), function(i){
    message(sprintf("%d ('%s') (%s) ...", i, names(ls_traits)[i], as.character(Sys.time())), appendLF=TRUE)
    data <- ls_traits[[i]] %>% select(snp, pvalue) %>% as.data.frame()
    ## first, genomic predictors
    ls_pN_genomic <- oPierSNPsAdv(data, LD.customised=LD.customised, significance.threshold=5e-8, score.cap=100, distance.max=20000, decay.kernel="constant", GR.SNP=GR.SNP, GR.Gene=GR.Gene, include.QTL=include.QTL, include.RGB=include.RGB, network.customised=network.customised, placeholder=placeholder, verbose.details=F, verbose=F)
    ## Prioritisation
    ls_pN <- base::Filter(base::Negate(is.null), ls_pN_genomic)
    dT <- oPierMatrix(ls_pN, displayBy="pvalue", aggregateBy="orderStatistic", keep=F, placeholder=placeholder)
})
names(ls_dTarget) <- names(ls_traits)

# save into a file 'PIA.ls_dTarget.RData' under the folder 'pia_showcase'
save(list='ls_dTarget', file=file.path(outdir,'PIA.ls_dTarget.RData'))

8.1 Manhattan plot

ls_dTarget_selected <- ls_dTarget[c('adult onset asthma', 'childhood onset asthma')]
ls_gp_manhattan <- pbapply::pblapply(seq(length(ls_dTarget_selected)), function(i){
    dT_tmp <- ls_dTarget_selected[[i]]
    dT_tmp$priority <- dT_tmp$priority %>% top_frac(0.45,rating)
    top.label.query <- dT_tmp$priority %>% inner_join(df_GR_gene, by=c('name'='Symbol')) %>% group_by(seqnames) %>% top_n(1,rating) %>% pull(name)
    label.query.only <- F
    y.lab <- str_c("Priority index\n(", names(ls_dTarget_selected)[i], ")")
    gp_manhattan_chr <- oPierManhattan(dT_tmp, color=c("darkred", "steelblue4"), point.size=0.2, top=100000, top.label.type="text", top.label.size=2, top.label.col="black", top.label.query=top.label.query, label.query.only=label.query.only, y.lab=y.lab, signature=F, placeholder=placeholder)
})

# saved into a file 'PIA_onset_manhattan_chr.pdf' under the folder 'pia_showcase'
gp_manhattan_combined <- ls_gp_manhattan %>% wrap_plots(ncol=1, guides='collect')
ggsave(file.path(outdir,'PIA_onset_manhattan_chr.pdf'), gp_manhattan_combined, width=8, height=5)

Figure 8.1: Manhattan plot illustrates priority rating (y-axis) for prioritised target genes along genomic locations, with the top prioritised gene named per chromosome (x-axis). Top: adult-onset asthma; Bottom: children-onset asthma.

8.2 Target hallmarks

# ls_vec
ls_vec <- lapply(ls_dTarget_selected, function(dT){
    dT$priority %>% top_frac(0.01,rating) %>% pull(name)
})

# sets
sets <- tibble(onto=c('MsigdbH')) %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))

# esad
esad <- oSEAadv(ls_vec, sets, size.range=c(10,1500), test="fisher", min.overlap=5)

# oSEAggraph
ig <- oRDS("ig.MsigdbH", placeholder=placeholder)
ig %>% oIG2TB('nodes') %>% tail() %>% print(width=Inf)
V(ig)$label <- V(ig)$name %>% str_replace_all('HALLMARK_','') %>% str_replace_all('_',' ') %>% str_to_sentence()
gp <- esad %>% oSEAextract() %>% filter(adjp<0.05) %>% oSEAggraph(ig, fixed=T, node.color="adjp", colormap="grey90-orange-darkred", zlim=c(0,15), node.size="zscore",slim=c(0,15),node.size.range=c(0.5,4.5),node.label.size=2.5, node.label.direction=c('none','leftright','topbottom')[2], layout=c('stress','dendrogram')[2], leave=F, edge.color='steelblue', edge.alpha=0.2, edge.width=0.5)

# saved into a file 'PIA_onset_hallmark.pdf' under the folder 'pia_showcase'
gp_hallmark <- list(gp$gp_template, gp) %>% wrap_plots(ncol=1, heights=c(1,1), guides='collect')
ggsave(file.path(outdir,'PIA_onset_hallmark.pdf'), gp_hallmark, width=8, height=7.5)

Figure 8.2: Circular overview of molecular hallmarks (top panel), with enrichments shown for adult-onset asthma (bottom-left panel) and childhood-onset asthma (bottom-right panel) where nodes are sized by the enrichment z-score and colored by the enrichment significance (FDR).

$Circular overview of molecular hallmarks (top panel), with enrichments shown for adult-onset asthma (bottom-left panel) and childhood-onset asthma (bottom-right panel) where nodes are sized by the enrichment z-score and colored by the enrichment significance (FDR).$

8.3 Identification of pathway crosstalks

ig.KEGG.category <- oRDS('ig.KEGG.category', placeholder=placeholder)
ig <- ig.KEGG.category %>% filter(category %in% c("Environmental Process","Organismal System")) %>% deframe() %>% oCombineNet()
ig2 <- oNetInduce(ig, nodes_query=V(ig)$name, largest.comp=T) %>% as.undirected()
ls_subg_perm <- pbapply::pblapply(seq(length(ls_dTarget_selected)), function(i){
    message(sprintf("%d ('%s') (%s) ...", i, names(ls_dTarget_selected)[i], as.character(Sys.time())), appendLF=TRUE)
    #subg <- oPierSubnet(ls_dTarget_selected[[i]], network.customised=ig2, subnet.size=50)
    subg_perm <- oPierSubnet(ls_dTarget_selected[[i]], network.customised=ig2, subnet.size=50, test.permutation=T, num.permutation=10, respect="degree", aggregateBy="fishers")
})
names(ls_subg_perm) <- names(ls_dTarget_selected)

# save into a file 'PIA.ls_subg_perm.RData' under the folder 'pia_showcase'
save(list='ls_subg_perm', file=file.path(outdir,'PIA.ls_subg_perm.RData'))

# extract pathway crosstalk genes
ls_df_subg <- pbapply::pblapply(seq(length(ls_subg_perm)), function(i){
    message(sprintf("%d ('%s') (%s) ...", i, names(ls_subg_perm)[i], as.character(Sys.time())), appendLF=TRUE)
    dT <- ls_dTarget[[i]]
    subg <- ls_subg_perm[[i]]
    df_subg <- dT$priority %>% as_tibble() %>% mutate(subtype=names(ls_subg_perm)[i]) %>% select(subtype,name,rating,rank,seed,nGene,eGene,cGene,description)  %>% semi_join(tibble(name=V(subg)$name), by='name')
})
df_subg <- do.call(rbind, ls_df_subg)

# write into a file 'PIA_onset_subg.txt' under the folder 'pia_showcase'
df_subg %>% write_delim(file.path(outdir,'PIA_onset_subg.txt'), delim='\t')

Pathway crosstalk genes stored in the output file PIA_onset_subg.txt above can be explored below.

8.4 Pathway crosstalk visualisation

ls_gp_subg <- pbapply::pblapply(seq(length(ls_subg_perm)), function(i){
    ig <- ls_subg_perm[[i]]
    dTarget <- ls_dTarget_selected[[i]]
    
    # degree
    V(ig)$degree <- igraph::degree(ig)
    # layout: xcoord + ycoord
    ig <- ig %>% oLayout("graphlayouts.layout_with_stress")
    # label
    V(ig)$label <- tibble(name=V(ig)$name) %>% inner_join(dTarget$priority, by='name') %>% mutate(label=str_c(name)) %>% pull(label)
    # gp_subg
    gp_subg <- ig %>% oGGnetwork(node.label='label', node.label.size=2.5, node.label.color='black', node.label.alpha=1, node.label.padding=0.1, node.label.arrow=0, node.label.force=0.05, node.xcoord="xcoord", node.ycoord="ycoord", node.color="priority", node.color.title='Priority rating', colormap="spectral", zlim=c(0,5), node.color.alpha=0.95, node.size='degree', node.size.title='degree', node.size.range=c(1.5,4.5), edge.color="steelblue4", edge.color.alpha=0.2,edge.size=0.3,edge.curve=0.05,edge.arrow.gap=0.01)
    gp_subg + guides(size="none")
})
names(ls_gp_subg) <- names(ls_subg_perm)

# saved into a file 'PIA_onset_subg.pdf' under the folder 'pia_showcase'
gp_subg <- ls_gp_subg %>% wrap_plots(ncol=1, heights=c(1,1), guides='collect')
ggsave(file.path(outdir,'PIA_onset_subg.pdf'), gp_subg, width=7, height=8)

Figure 8.3: Visualisations of pathway crosstalks are illustrated separately for adult-onset asthma (top panel) and childhood-onset asthma (bottom panel), each identified by integrating the target priority rating information with pathway-derived gene interactions (merged from KEGG pathways).

Visualisations of pathway crosstalks are illustrated separately for adult-onset asthma (top panel) and childhood-onset asthma (bottom panel), each identified by integrating the target priority rating information with pathway-derived gene interactions (merged from KEGG pathways).

8.5 Heatmap

mat_priority <- df_subg %>% select(trait, name, rating) %>% pivot_wider(names_from=trait, values_from=rating)
mat_priority <- mat_priority %>% mutate(code=str_c(0+!is.na(`adult onset asthma`), 0+!is.na(`childhood onset asthma`))) %>% arrange(desc(code),name)

# write into a file 'PIA_onset_heatmap.txt' under the folder 'pic2_showcase'
mat_priority %>% write_delim(file.path(outdir,'PIA_onset_heatmap.txt'), delim='\t')

# heatmap
data <- mat_priority %>% select(-code) %>% column_to_rownames('name')
gp_heatmap <- oHeatmap(data, zlim=c(0,5), colormap="spectral", x.rotate=45, x.text.size=7, y.text.size=7, legend.title="Priority rating", legend.text.size=6, legend.title.size=8, shape=19, size=2.5)
# saved into a file 'PIA_onset_heatmap.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_onset_heatmap.pdf'), gp_heatmap, width=2, height=8)

Heatmap in the output file PIA_onset_heatmap.txt above can be explored below.

Figure 8.4: Heatmap illustrates pathway crosstalk genes for adult-onset asthma (the 1st column) and childhood-onset asthma (the 2nd column), with genes divided into three groups: shared, specific to adult-onset asthma, and specific to childhood-onset asthma.

Heatmap illustrates pathway crosstalk genes for adult-onset asthma (the 1st column) and childhood-onset asthma (the 2nd column), with genes divided into three groups: shared, specific to adult-onset asthma, and specific to childhood-onset asthma.

8.6 KEGG characterisation for subtypes

# enrichment analysis
## ls_code
ls_code <- mat_priority %>% select(name,code) %>% nest(data=-code) %>% mutate(gene=map(data,~pull(.x,name))) %>% select(-data) %>% deframe()
## background
background <- mat_priority %>% pull(name)
## sets
sets <- tibble(onto='KEGG') %>% mutate(set=map(onto,~oRDS(str_c("org.Hs.eg",.x),placeholder=placeholder)))
## esad
esad <- oSEAadv(ls_code, sets, background=NULL, size.range=c(10,500), min.overlap=10)

# balloon plot
## top two enrichments per module
gp <- esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% oSEAballoon(top=5, adjp.cutoff=0.05, zlim=NULL, slim=NULL, size.range=c(1,6), shape=19, colormap="grey90-orange-darkred")
gp_kegg <- gp + theme(axis.text.y=element_text(size=6),axis.text.x=element_text(size=6,angle=0),strip.text.y=element_text(size=6,angle=0))

# save into a file 'PIA_onset_kegg.pdf' under the folder 'pipe_showcase'
ggsave(file.path(outdir,'PIA_onset_kegg.pdf'), gp_kegg, width=4.5, height=2)

# write into a file 'PIA_onset_kegg.txt' under the folder 'pipe_showcase'
esad %>% oSEAextract() %>% filter(adjp<1e-2, namespace %in% 'Environmental Process') %>% group_by(group) %>% top_n(2,-adjp) %>% arrange(group,adjp) %>% write_delim(file.path(outdir,'PIA_onset_kegg.txt'), delim='\t')

Enriched pathways stored in the output file PIA_onset_kegg.txt above can be explored below.

Figure 8.5: KEGG pathway enrichment analysis for genes within each of three groups coded as ‘11’ (shared), ‘10’ (specific to adult-onset asthma), and ‘01’ (specific to childhood-onset asthma). Enrichment analysis was based on one-sided Fisher’s exact test.

KEGG pathway enrichment analysis for genes within each of three groups coded as '11' (shared), '10' (specific to adult-onset asthma), and '01' (specific to childhood-onset asthma). Enrichment analysis was based on one-sided Fisher’s exact test.

8.7 Repurposing enrichment analysis

# ontology annotation: two categories 'approved' and 'phased' (phases 2 and 3)
set <- ChEMBL_v30_DTT %>% filter(phase>=2) %>% 
  select(Symbol,phase) %>% arrange(-phase) %>% 
  distinct(Symbol, .keep_all=T) %>% filter(phase>=2) %>%
  mutate(phase=ifelse(phase==4,'approved','phased')) %>% 
  nest(data=-phase) %>% mutate(data=map(data,~pull(.x))) %>% 
  deframe()
  
# enrichment analysis
ls_vec <- lapply(ls_dTarget_selected, function(dT){
    dT$priority %>% pull(name)
})
background <- unlist(ls_vec) %>% unique()
ls_res <- lapply(1:length(ls_code), function(i){
    eset <- oSEA(ls_code[[i]], set, background=background, size.range=c(10,20000), min.overlap=1)
    eset %>% oSEAextract() %>% mutate(group=names(ls_code)[i])
})
df_res <- do.call(rbind, ls_res)

# forest plot
df_res <- df_res %>% filter(name %in% c('approved','phased')) %>% mutate(adjp=p.adjust(pvalue,method="BH"))
gp_forest <- df_res %>% oSEAforest(adjp.cutoff=1, zlim=NULL, colormap='grey50-orange-darkred', legend.direction="horizontal", sortBy="or")

# save into a file 'PIA_onset_repurpose.pdf' under the folder 'pia_showcase'
ggsave(file.path(outdir,'PIA_onset_repurpose.pdf'), gp_forest, width=7, height=2)

# write into a file 'PIA_onset_repurpose.txt' under the folder 'pipe_showcase'
df_res %>% select(group,name,zscore,adjp,or,CIl,CIu,nO,overlap) %>% arrange(group,name,adjp) %>% write_delim(file.path(outdir,'PIA_onset_repurpose.txt'), delim='\t')

Enriched approved or phased drug target categories stored in the output file PIA_onset_repurpose.txt above can be explored below.

Figure 8.6: Forest plot of approved or phased drug target enrichments. Approved and phased drug targets are sourced from ChEMBL.

8.8 Repurposing analysis for shared genes

Drug repurposing dotplot for shared genes (coded 11)

# dtt
dtt <- ChEMBL_v30_DTT %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()

# vec_data
df_data <- tibble(Symbol=ls_code$`11`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)

# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=60, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)

# write into a file 'PIA_onset_repurpose_11.txt' under the folder 'pia_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'PIA_onset_repurpose_11.txt'), delim='\t')

# save into a file 'PIA_onset_repurpose_11.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right")
ggsave(file.path(outdir,'PIA_onset_repurpose_11.pdf'), gp_DR, width=6.5, height=3.5)

# save into a file 'PIA_onset_repurpose_11_index.pdf' under the folder 'pia_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
## two-column tables, each row only showing the first one if multiple drugs
df_index <- DR$index
tmp <- df_index %>% separate_rows(Drug, sep=', ')
n <- tmp %>% slice(floor(tmp %>% nrow()/2)) %>% pull(Drug_index)
df <- df_index %>% mutate(Drug=str_replace_all(Drug,', ','\n'))
m <- df %>% nrow()
gt1 <- df %>% slice(1:n) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
gt2 <- df %>% slice((n+1):m) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
ls_gt <- list(gt1, gt2)
gp_table <- ls_gt %>% gridExtra::marrangeGrob(ncol=2, nrow=1, as.table=FALSE)
ggsave(file.path(outdir,'PIA_onset_repurpose_11_index.pdf'), gp_table, width=6.5, height=5.5)

Figure 8.7: Dot plot showing 9 genes (y-axis) that are currently targeted by approved drugs in disease (x-axis). Dots are referenced in integers (see the next).

Dot plot showing 9 genes (y-axis) that are currently targeted by approved drugs in disease (x-axis). Dots are referenced in integers (see the next).

Figure 8.8: The information for referenced drugs and mechanisms of action. See referenced integers above.

$The information for referenced drugs and mechanisms of action. See referenced integers above.$

Drug-target-disease information stored in the output file PIA_onset_repurpose_11.txt above can be explored below.

8.9 Repurposing analysis for genes specific to childhood-onset asthma

Drug repurposing dotplot for childhood-onset-specific crosstalk genes (coded 01)

# dtt
dtt <- ChEMBL_v30_DTT %>% filter(phase>=4) %>% mutate(target_number=1) %>% as.data.frame()

# vec_data
df_data <- tibble(Symbol=ls_code$`01`)
vec_data <- dtt %>% select(Symbol,phase) %>% distinct() %>% arrange(-phase,Symbol) %>% semi_join(df_data, by='Symbol') %>% pull(Symbol)

# DR
DR <- oRepurpose(vec_data, phase.min=4, target.max=5, reorder="none", DTT=dtt, restricted=NULL, colormap="spectral.top", zlim=c(1,4), na.color='transparent', label.size=2.5, label.color="white", x.rotate=60, size=4, legend.title='Phase', x.text.size=6, y.text.size=6)

# write into a file 'PIA_onset_repurpose_01.txt' under the folder 'pia_showcase'
DR$df %>% select(Target,Disease,Drug_index,Drug) %>% arrange(Drug_index,Disease) %>% write_delim(file.path(outdir,'PIA_onset_repurpose_01.txt'), delim='\t')

# save into a file 'PIA_onset_repurpose_01.pdf' under the folder 'pipe_showcase'
gp_DR <- DR$gp + theme(legend.position="none") + scale_y_discrete(position="right",limits=rev)+ scale_x_discrete(position="top",limits=rev) + coord_flip()
ggsave(file.path(outdir,'PIA_onset_repurpose_01.pdf'), gp_DR, width=2.1, height=4)

# save into a file 'PIA_onset_repurpose_01_index.pdf' under the folder 'pia_showcase'
library(gridExtra)
tt <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"))
tt_right <- ttheme_default(base_size=6, padding=unit(c(2,2),"mm"), core=list(fg_params=list(hjust=1,x=0.98)), colhead=list(fg_params=list(hjust=1, x=0.98)))
## one-column table
gp_index <- DR$index %>% mutate(Drug=str_replace_all(Drug,', ','\n')) %>% gridExtra::tableGrob(rows=NULL,cols=c('Index','Drugs [mechanisms of action]'), theme=tt_right)
grid::grid.draw(gp_index)
ggsave(file.path(outdir,'PIA_onset_repurpose_01_index.pdf'), gp_index, width=4, height=4)

Figure 8.9: Dot plot showing 4 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Dots are referenced in integers (see the next).

Dot plot showing 4 genes (x-axis) that are currently targeted by approved drugs in disease (y-axis). Dots are referenced in integers (see the next).

Figure 8.10: The information for referenced drugs and mechanisms of action. See referenced integers above.

$The information for referenced drugs and mechanisms of action. See referenced integers above.$

Drug-target-disease information stored in the output file PIA_onset_repurpose_01.txt above can be explored below.

Priority Index for Asthma (PIA)

Hai Fang

Shanghai

2023-06-02

1 Overview

2 R and Package installation

3 Load packages and specify built-in data

4 Genetics-led target prioritisation

4.1 Input data

4.2 Parameters and built-in datasets

4.3 Do prioritisation

4.4 Prioritised target genes

4.5 Manhattan plot

5 Benchmarking

5.1 Methods combining predictors

5.2 Naive prediction

5.3 Open Targets Genetics

5.4 ROC plot

6 Disease relevance

7 Leading prioritisation analysis

7.1 LPA analysis

7.2 Leading plot

7.3 Leading genes recovered enriched for approved drug targets

8 Prioritisation for different subtypes

8.1 Manhattan plot

8.2 Target hallmarks

8.3 Identification of pathway crosstalks

8.4 Pathway crosstalk visualisation

8.5 Heatmap

8.6 KEGG characterisation for subtypes

8.7 Repurposing enrichment analysis

8.8 Repurposing analysis for shared genes

8.9 Repurposing analysis for genes specific to childhood-onset asthma

Priority Index for Asthma (PIA)

Hai Fang Shanghai

2023-06-02

1 Overview

2 R and Package installation

3 Load packages and specify built-in data

4 Genetics-led target prioritisation

4.1 Input data

4.2 Parameters and built-in datasets

4.3 Do prioritisation

4.4 Prioritised target genes

4.5 Manhattan plot

5 Benchmarking

5.1 Methods combining predictors

5.2 Naive prediction

5.3 Open Targets Genetics

5.4 ROC plot

6 Disease relevance

7 Leading prioritisation analysis

7.1 LPA analysis

7.2 Leading plot

7.3 Leading genes recovered enriched for approved drug targets

8 Prioritisation for different subtypes

8.1 Manhattan plot

8.2 Target hallmarks

8.3 Identification of pathway crosstalks

8.4 Pathway crosstalk visualisation

8.5 Heatmap

8.6 KEGG characterisation for subtypes

8.7 Repurposing enrichment analysis

8.8 Repurposing analysis for shared genes

8.9 Repurposing analysis for genes specific to childhood-onset asthma

Hai Fang

Shanghai