rm(list = ls())
library(ggplot2)
library(dplyr)
library(pander)
panderOptions('table.split.table', Inf)

1 Install

Install SilverStandardPerformance

devtools::install_github('liangyy/silver-standard-performance')
## Skipping install of 'SilverStandardPerformance' from a github remote, the SHA1 (acd65c56) has not changed since last install.
##   Use `force = TRUE` to force installation
library(SilverStandardPerformance)

2 Load silver standard

data("omim_based_silver_standard")
# show the silver standard object
omim_based_silver_standard$script_info
## [1] "phecode_to_omim_to_gene.sh"
omim_based_silver_standard$table %>% head %>% pander
gene HPO phecode EFO
ENSG00000133019 HP:0000028 751.12 NA
ENSG00000133019 HP:0000072 586.4 NA
ENSG00000133019 HP:0000126 595 NA
ENSG00000133019 HP:0000217 527.7 NA
ENSG00000133019 HP:0000767 756.21 NA
ENSG00000133019 HP:0000768 756.22 NA

3 Load example data set

Using the one composed for GTEx GWAS paper.

options(stringsAsFactors = F)
library(dplyr)
dat = read.table('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/fdr_power_specificity/companion_figure_finalized/summary_on_expression_cleanup/logistic-based-test.datamatrix.OMIM-LD-block-PrediXcan-MASH-EUR.tsv', header = T, sep = '\t')
gwas_catelog_to_phecode = read.csv('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/gold-standard/gwas-catalog-to-phecode.csv')
trait_to_gwas_catalog = read.delim2('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/gold-standard/trait-to-hpo-and-mim.txt', header = T, sep = '\t')

# obtain mapping between trait and phecode
map_trait_to_phecode = list()
for(i in 1 : nrow(trait_to_gwas_catalog)) {
  mapped_traits = trait_to_gwas_catalog$mapped_trait[i]
  if(is.na(mapped_traits)) {
    next
  }
  catalog_strings = unique(unlist(strsplit(mapped_traits, ';')))
  phecodes = gwas_catelog_to_phecode %>% filter(trait %in% catalog_strings) %>% pull(phecode)
  phecodes = unique(phecodes)
  phecodes = phecodes[phecodes != '-']
  if(length(phecodes) == 0) {
    next
  }
  map_trait_to_phecode[[length(map_trait_to_phecode) + 1]] = data.frame(trait = trait_to_gwas_catalog$trait[i], phecode = phecodes)
}
map_trait_to_phecode = do.call(rbind, map_trait_to_phecode)

# obtain mapping between trait and HPO
map_trait_to_hpo = list()
for(i in 1 : nrow(trait_to_gwas_catalog)) {
  hpo_str = trait_to_gwas_catalog$hpo[i]
  if(is.na(hpo_str)) {
    next
  }
  hpos = unique(unlist(strsplit(hpo_str, ';')))
  if(length(hpos) == 0) {
    next
  }
  map_trait_to_hpo[[length(map_trait_to_hpo) + 1]] = data.frame(trait = trait_to_gwas_catalog$trait[i], HPO = paste0('HP:', stringr::str_pad(hpos, 7, pad = "0")))
}
map_trait_to_hpo = do.call(rbind, map_trait_to_hpo)


score_table = dat %>% select(trait, gene, predixcan_mashr_eur_score, enloc_score) %>% distinct()
map_table = map_trait_to_phecode %>% distinct()
map_table2 = map_trait_to_hpo %>% distinct()

3.1 Score table

score_table %>% head %>% pander
trait gene predixcan_mashr_eur_score enloc_score
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000000457 2.667 0
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000000460 13.01 0
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000007908 17.75 0
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000075945 4.271 0
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000117475 3.957 0
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt ENSG00000117477 40.43 0

3.2 Map table

map_table %>% head %>% pander(caption = 'trait to phecode')
trait to phecode
trait phecode
UKB_1200_Sleeplessness_or_insomnia 327.4
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt 452
UKB_20002_1111_self_reported_asthma 495
UKB_20002_1222_self_reported_type_1_diabetes 250.1
UKB_20002_1223_self_reported_type_2_diabetes 250.2
UKB_20002_1223_self_reported_type_2_diabetes 274.1
map_table2 %>% head %>% pander(caption = 'trait to HPO')
trait to HPO
trait HPO
UKB_1200_Sleeplessness_or_insomnia HP:0100785
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt HP:0004936
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt HP:0001977
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt HP:0001907
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt HP:0002639
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt HP:0030243

4 Run silver standard analysis

4.1 Join by phecode

o = silver_standard_perf(score_table, map_table, omim_based_silver_standard, trait_codes = 'phecode')
## Run with silver standard from: phecode_to_omim_to_gene.sh
## Map trait by: phecode
## Mapper chosen: greedy_map
## Extracting all columns of score_table other than "trait" and "gene" as scores
## 2 score columns are used
## # trait-gene pairs in score table before step 2: 6228
## gwas_loci is set to NULL, skip pre-processing step
## # trait-gene pairs in score table after step 2: 6228
o$pr + ggtitle('PR by phecode')

o$roc + ggtitle('ROC by phecode')

o$roc_auc %>% pander('ROC AUC')
ROC AUC
roc_auc score
0.6771 predixcan_mashr_eur_score
0.6732 enloc_score

4.2 Join by HPO

o = silver_standard_perf(score_table, map_table2, omim_based_silver_standard, trait_codes = 'HPO')
## Run with silver standard from: phecode_to_omim_to_gene.sh
## Map trait by: HPO
## Mapper chosen: greedy_map
## Extracting all columns of score_table other than "trait" and "gene" as scores
## 2 score columns are used
## # trait-gene pairs in score table before step 2: 6228
## gwas_loci is set to NULL, skip pre-processing step
## # trait-gene pairs in score table after step 2: 6228
o$pr + ggtitle('PR by HPO')

o$roc + ggtitle('ROC by HPO')

o$roc_auc %>% pander('ROC AUC')
ROC AUC
roc_auc score
0.6771 predixcan_mashr_eur_score
0.6732 enloc_score