File size: 3,416 Bytes
7718235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
genes <- c('PTEN', 'NUDT15', 'SNCA', 'CYP2C9', 'GCK', 'ASPA', 'CCR5', 'CXCR4')
stab.assay <- c(1, 1, 2, 2, 2, 1, 1, 1)
task.dic <- list("PTEN"=c("score.1"="stability", "score.2"="enzyme.activity"), 
                 "NUDT15"=c("score.1"="stability", "score.2"="enzyme.activity"), 
                 "VKORC1"=c("score.1"="enzyme.activity", "score.2"="stability"), 
                 "CCR5"=c("score.1"="stability", "score.2"="binding Ab2D7", "score.3"="binding HIV-1"), 
                 "CXCR4"=c("score.1"="stability", "score.2"="binding CXCL12", "score.3"="binding Ab12G5"),
                 "SNCA"=c("score.1"="enzyme.activity", "score.2"="stability"),
                 "CYP2C9"=c("score.1"="enzyme.activity", "score.2"="stability"),
                 "GCK"=c("score.1"="enzyme.activity", "score.2"="stability"),
                 "ASPA"=c("score.1"="stability", "score.2"="enzyme.activity")
)
result <- NULL
sp.stats <- NULL
pr.stats <- NULL
all.plots <- list()
k = 1
for (i in 1:length(genes)) {
  assay <- read.csv(paste0('../data.files/', genes[i], '/ALL.annotated.csv'))
  # test the correlation between stab and foldx_ddG
  stab.score.columns <- paste0('score.', stab.assay[i])
  stab.corr <- abs(cor.test(assay$FoldXddG, assay[,stab.score.columns])$estimate)
  other.score.columns <- colnames(assay)[startsWith(colnames(assay), 'score')]
  other.score.columns <- other.score.columns[!other.score.columns %in% stab.score.columns]
  other.corr <- NULL
  for (c in other.score.columns) {
    other.corr <- c(other.corr, abs(cor.test(assay$RosettaddG, assay[,c])$estimate))
  }
  other.corr <- mean(other.corr, na.rm = T)
  result <- rbind(result,
                  data.frame(HGNC=genes[i],
                             stab.corr=stab.corr,
                             other.corr=other.corr))
  if (genes[i] == 'ASPA') {
    assay[,other.score.columns] <- -assay[,other.score.columns]
    x.pos <- 'right'
    y.pos <- 'bottom'
  } else {
    x.pos <- 'left'
    y.pos <- 'top'
  }
  # plot scatter plot of stability and other assay
  for (c in other.score.columns) {
    sp.stats[k] <- cor.test(assay[,stab.score.columns],
                            assay[,c], method = 'spearman')$estimate
    pr.stats[k] <- cor.test(assay[,stab.score.columns],
                            assay[,c], method = 'pearson')$estimate
    p <- ggplot(assay, aes_string(x=stab.score.columns, y=c)) + 
      geom_point(alpha=0.2, color='grey') +
      geom_density_2d(color='gray1') +
      stat_smooth(method = "lm", formula = y~x, color='blue') +
      ggpp::geom_text_npc(data=data.frame(x=x.pos, y=y.pos,
                                          label=paste0("Pearson r=", signif(pr.stats[k], digits = 2),
                                                       "\nSpearman rho=", signif(sp.stats[k], digits = 2))),
                          aes(npcx=x, npcy=y, label=label),
                          col='black') +
      ggtitle(genes[i]) +
      xlab(task.dic[[genes[i]]][stab.score.columns]) +
      ylab(task.dic[[genes[i]]][c]) + 
      theme_bw() + ggeasy::easy_center_title()
    all.plots[[k]] <- p
    k <- k + 1
  }
}
# make plot
library(patchwork)
p <- (all.plots[[1]] + all.plots[[2]] + all.plots[[3]]) /
  (all.plots[[4]] + all.plots[[5]] + all.plots[[6]]) /
  (all.plots[[7]] + all.plots[[8]] + all.plots[[9]] + all.plots[[10]] + plot_layout(ncol = 4))
ggsave('figs/fig.sup.3.pdf', p, height = 10, width = 10)