rm(list = ls())
library(ggplot2)
library(dplyr)
library(pander)
panderOptions('table.split.table', Inf)
Install
Install SilverStandardPerformance
devtools::install_github('liangyy/silver-standard-performance')
## Skipping install of 'SilverStandardPerformance' from a github remote, the SHA1 (acd65c56) has not changed since last install.
## Use `force = TRUE` to force installation
library(SilverStandardPerformance)
Load silver standard
data("omim_based_silver_standard")
# show the silver standard object
omim_based_silver_standard$script_info
## [1] "phecode_to_omim_to_gene.sh"
omim_based_silver_standard$table %>% head %>% pander
ENSG00000133019 |
HP:0000028 |
751.12 |
NA |
ENSG00000133019 |
HP:0000072 |
586.4 |
NA |
ENSG00000133019 |
HP:0000126 |
595 |
NA |
ENSG00000133019 |
HP:0000217 |
527.7 |
NA |
ENSG00000133019 |
HP:0000767 |
756.21 |
NA |
ENSG00000133019 |
HP:0000768 |
756.22 |
NA |
Load example data set
Using the one composed for GTEx GWAS paper.
options(stringsAsFactors = F)
library(dplyr)
dat = read.table('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/fdr_power_specificity/companion_figure_finalized/summary_on_expression_cleanup/logistic-based-test.datamatrix.OMIM-LD-block-PrediXcan-MASH-EUR.tsv', header = T, sep = '\t')
gwas_catelog_to_phecode = read.csv('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/gold-standard/gwas-catalog-to-phecode.csv')
trait_to_gwas_catalog = read.delim2('https://bitbucket.org/yanyul/rotation-at-imlab/raw/85a3fbe8f08df7c67265fed69569b7ea554d4e12/analysis/gold-standard/trait-to-hpo-and-mim.txt', header = T, sep = '\t')
# obtain mapping between trait and phecode
map_trait_to_phecode = list()
for(i in 1 : nrow(trait_to_gwas_catalog)) {
mapped_traits = trait_to_gwas_catalog$mapped_trait[i]
if(is.na(mapped_traits)) {
next
}
catalog_strings = unique(unlist(strsplit(mapped_traits, ';')))
phecodes = gwas_catelog_to_phecode %>% filter(trait %in% catalog_strings) %>% pull(phecode)
phecodes = unique(phecodes)
phecodes = phecodes[phecodes != '-']
if(length(phecodes) == 0) {
next
}
map_trait_to_phecode[[length(map_trait_to_phecode) + 1]] = data.frame(trait = trait_to_gwas_catalog$trait[i], phecode = phecodes)
}
map_trait_to_phecode = do.call(rbind, map_trait_to_phecode)
# obtain mapping between trait and HPO
map_trait_to_hpo = list()
for(i in 1 : nrow(trait_to_gwas_catalog)) {
hpo_str = trait_to_gwas_catalog$hpo[i]
if(is.na(hpo_str)) {
next
}
hpos = unique(unlist(strsplit(hpo_str, ';')))
if(length(hpos) == 0) {
next
}
map_trait_to_hpo[[length(map_trait_to_hpo) + 1]] = data.frame(trait = trait_to_gwas_catalog$trait[i], HPO = paste0('HP:', stringr::str_pad(hpos, 7, pad = "0")))
}
map_trait_to_hpo = do.call(rbind, map_trait_to_hpo)
score_table = dat %>% select(trait, gene, predixcan_mashr_eur_score, enloc_score) %>% distinct()
map_table = map_trait_to_phecode %>% distinct()
map_table2 = map_trait_to_hpo %>% distinct()
Score table
score_table %>% head %>% pander
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000000457 |
2.667 |
0 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000000460 |
13.01 |
0 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000007908 |
17.75 |
0 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000075945 |
4.271 |
0 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000117475 |
3.957 |
0 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
ENSG00000117477 |
40.43 |
0 |
Map table
map_table %>% head %>% pander(caption = 'trait to phecode')
trait to phecode
UKB_1200_Sleeplessness_or_insomnia |
327.4 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
452 |
UKB_20002_1111_self_reported_asthma |
495 |
UKB_20002_1222_self_reported_type_1_diabetes |
250.1 |
UKB_20002_1223_self_reported_type_2_diabetes |
250.2 |
UKB_20002_1223_self_reported_type_2_diabetes |
274.1 |
map_table2 %>% head %>% pander(caption = 'trait to HPO')
trait to HPO
UKB_1200_Sleeplessness_or_insomnia |
HP:0100785 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
HP:0004936 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
HP:0001977 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
HP:0001907 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
HP:0002639 |
UKB_20002_1094_self_reported_deep_venous_thrombosis_dvt |
HP:0030243 |
Run silver standard analysis
Join by phecode
o = silver_standard_perf(score_table, map_table, omim_based_silver_standard, trait_codes = 'phecode')
## Run with silver standard from: phecode_to_omim_to_gene.sh
## Map trait by: phecode
## Mapper chosen: greedy_map
## Extracting all columns of score_table other than "trait" and "gene" as scores
## 2 score columns are used
## # trait-gene pairs in score table before step 2: 6228
## gwas_loci is set to NULL, skip pre-processing step
## # trait-gene pairs in score table after step 2: 6228
o$pr + ggtitle('PR by phecode')
o$roc + ggtitle('ROC by phecode')
o$roc_auc %>% pander('ROC AUC')
ROC AUC
0.6771 |
predixcan_mashr_eur_score |
0.6732 |
enloc_score |
Join by HPO
o = silver_standard_perf(score_table, map_table2, omim_based_silver_standard, trait_codes = 'HPO')
## Run with silver standard from: phecode_to_omim_to_gene.sh
## Map trait by: HPO
## Mapper chosen: greedy_map
## Extracting all columns of score_table other than "trait" and "gene" as scores
## 2 score columns are used
## # trait-gene pairs in score table before step 2: 6228
## gwas_loci is set to NULL, skip pre-processing step
## # trait-gene pairs in score table after step 2: 6228
o$pr + ggtitle('PR by HPO')
o$roc + ggtitle('ROC by HPO')
o$roc_auc %>% pander('ROC AUC')
ROC AUC
0.6771 |
predixcan_mashr_eur_score |
0.6732 |
enloc_score |