Overview
ggplot(variant) + geom_point(aes(x = Alt, y = Ref, color = log10(Odds.Ratio))) + facet_grid(.~Imbalance) + geom_abline(intercept = 0.07, slope = 1) + geom_abline(intercept = -0.07, slope = 1) + ggtitle('The Predicted Effect of \n Heterozygous sites in DNase-seq of E081') + scale_color_gradient2(midpoint=0, low="blue", mid="white", high="red", space ="Lab" )

Observed Log Odds Ratio vs. Predicted Log Odds Ratio
variant$logit.raf <- logit(variant$af)
variant$log10.oddsratio.predicted <- logit(variant$Ref) - logit(variant$Alt)
variant$logit.raf.predicted <- logit(variant$Ref / (variant$Ref + variant$Alt))
model.linear = lm(logit.raf ~ 0 + logit.raf.predicted, data = variant)
ggplot(variant, aes(x = logit.raf.predicted, y = logit.raf)) + geom_point(aes(color = Imbalance)) + geom_smooth(method='lm',formula=y~0+x) + annotate("text", x = 0.4, y = -1, label = lm_eqn(model.linear), parse = TRUE) + ggtitle('Observed Reference Allele Frequency vs \n Predicted RAF')

Predictive power of predicted p(ref) and p(alt)
Model: imbalance ~ f(ref, alt), link = binomial. \(f_1 = |x - y|\). \(f_2 = |\log_{10}(x/(1-x)) - \log_{10}(y/(1-y))|\). \(f_3 = logit(\frac{p_0}{p_0 + p_1}) = \log_{10} \frac{p_0}{p_1}\)
variant.pass.interest <- variant[variant$Imbalance != 'Unsure',]
model.logistic.abs <- glm(as.numeric(Imbalance == 'Imbalanced') ~ abs(Ref - Alt), family=binomial(link='logit'), data=variant.pass.interest)
model.logistic.lor <- glm(as.numeric(Imbalance == 'Imbalanced') ~ abs(log10.oddsratio.predicted), family=binomial(link='logit'), data=variant.pass.interest)
model.logistic.logitraf <- glm(as.numeric(Imbalance == 'Imbalanced') ~ abs(logit.raf.predicted), family=binomial(link='logit'), data=variant.pass.interest)
library(pander)
panderOptions('knitr.auto.asis', FALSE)
pander(model.logistic.abs)
Fitting generalized (binomial/logit) linear model: as.numeric(Imbalance == “Imbalanced”) ~ abs(Ref - Alt)
abs(Ref - Alt) |
6.96 |
1.179 |
5.902 |
3.599e-09 |
(Intercept) |
-1.056 |
0.04628 |
-22.83 |
2.401e-115 |
pander(model.logistic.lor)
Fitting generalized (binomial/logit) linear model: as.numeric(Imbalance == “Imbalanced”) ~ abs(log10.oddsratio.predicted)
abs(log10.oddsratio.predicted) |
3.099 |
0.4461 |
6.947 |
3.722e-12 |
(Intercept) |
-1.166 |
0.05408 |
-21.56 |
4.026e-103 |
pander(model.logistic.logitraf)
Fitting generalized (binomial/logit) linear model: as.numeric(Imbalance == “Imbalanced”) ~ abs(logit.raf.predicted)
abs(logit.raf.predicted) |
9.13 |
0.7761 |
11.76 |
5.975e-32 |
(Intercept) |
-1.265 |
0.04968 |
-25.46 |
6.049e-143 |
library(precrec)
joined <- join_scores(model.logistic.abs$fitted.values, model.logistic.lor$fitted.values, model.logistic.logitraf$fitted.values)
msmdat <- mmdata(joined, as.numeric(variant.pass.interest$Imbalance == 'Imbalanced'), modnames = c('f1', 'f2', 'f3'))
mscurves <- evalmod(msmdat)
plot(mscurves)

pander(auc(mscurves))
f1 |
1 |
ROC |
0.58 |
f1 |
1 |
PRC |
0.3507 |
f2 |
1 |
ROC |
0.5636 |
f2 |
1 |
PRC |
0.3586 |
f3 |
1 |
ROC |
0.6673 |
f3 |
1 |
PRC |
0.435 |