File size: 4,369 Bytes
2d12bc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/python
# coding: utf-8

# Author: LE YUAN

import os
import math
import json
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
import pandas as pd
from scipy.stats import gaussian_kde
from scipy import stats
from sklearn.metrics import mean_squared_error,r2_score


def main() :
    experimental_values = list()
    predicted_values = list()
    with open('../../Data/test_dataset/test_out.txt', 'r') as testfile :
        testData = testfile.readlines()[1:]

    number = 0
    for data in testData :
        line = data.strip().split('\t')
        # print(line)
        number += 1
        experimental, predicted = float(line[0]), float(line[1])
        experimental_values.append(experimental)
        predicted_values.append(predicted)

    # correlation, p_value = stats.pearsonr(x, y)
    correlation, p_value = stats.pearsonr(experimental_values, predicted_values)

    # https://blog.csdn.net/u012735708/article/details/84337262?utm_medium=distribute.pc_relevant.none-
    # task-blog-BlogCommendFromMachineLearnPai2-1.pc_relevant_is_cache&depth_1-utm_source=
    # distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.pc_relevant_is_cache
    r2 = r2_score(experimental_values,predicted_values)
    rmse = np.sqrt(mean_squared_error(experimental_values,predicted_values))

    print('The data point number: %s' % number)
    print('r is: %.2f' % correlation)
    # print('p value is: %s' % p_value)
    print('p value is: %s' % p_value)
    print('R2 is: %.2f' % r2)
    print('RMSE is: %.2f' % rmse)
    print('\n')
        
    # Results:
    # The data point number: 1684
    # r is: 0.71
    # p value is: 1.4624026012794328e-262
    # R2 is: 0.51
    # RMSE is: 1.05

    allData = pd.DataFrame(list(zip(experimental_values,predicted_values)))
    allData.columns = ['Experimental value', 'Predicted value']

    plt.figure(figsize=(1.5,1.5))

    # To solve the 'Helvetica' font cannot be used in PDF file
    # https://stackoverflow.com/questions/59845568/the-pdf-backend-does-not-currently-support-the-selected-font
    # rc('text', usetex=True) 
    rc('font',**{'family':'serif','serif':['Helvetica']})
    plt.rcParams['pdf.fonttype'] = 42
    # plt.rc('text', usetex=True)

    plt.axes([0.12,0.12,0.83,0.83])

    plt.tick_params(direction='in')
    plt.tick_params(which='major',length=1.5)
    plt.tick_params(which='major',width=0.4)

    # http://showteeth.tech/posts/24328.html
    # https://stackoverflow.com/questions/49662964/density-scatter-plot-for-huge-dataset-in-matplotlib
    kcat_values_vstack = np.vstack([experimental_values,predicted_values])
    experimental_predicted = gaussian_kde(kcat_values_vstack)(kcat_values_vstack)

    # plt.scatter(data = allData, x = 'Predicted value', y = 'Experimental value')
    # sns.regplot(data = allData, x = 'Experimental value', y = 'Predicted value', color='#2166ac', scatter_kws={"s": 1})
    ax = plt.scatter(x = experimental_values, y = predicted_values, c=experimental_predicted, s=3, edgecolor=[])

    # https://stackoverflow.com/questions/53935805/specify-range-of-colors-for-density-plot-in-matplotlib
    cbar = plt.colorbar(ax)
    cbar.ax.tick_params(labelsize=6)
    cbar.set_label('Density', size=7)

    plt.text(-4.7, 6.9, 'r = %.2f' % correlation, fontweight ="normal", fontsize=6)
    plt.text(-4.7, 5.9, 'P value = 1.5e-262', fontweight ="normal", fontsize=6)
    plt.text(-4.7, 4.8, 'N = 1684', fontweight ="normal", fontsize=6)

    plt.rcParams['font.family'] = 'Helvetica'

    plt.xlabel("Experimental $k$$_\mathregular{cat}$ value", fontdict={'weight': 'normal', 'fontname': 'Helvetica', 'size': 7}, fontsize=7)
    plt.ylabel('Predicted $k$$_\mathregular{cat}$ value',fontdict={'weight': 'normal', 'fontname': 'Helvetica', 'size': 7},fontsize=7)

    plt.xticks([-6, -4, -2, 0, 2, 4, 6, 8])
    plt.yticks([-6, -4, -2, 0, 2, 4, 6, 8])

    plt.xticks(fontsize=6)
    plt.yticks(fontsize=6)

    # plt.rcParams['text.usetex'] = True

    ax = plt.gca()
    ax.spines['bottom'].set_linewidth(0.5)
    ax.spines['left'].set_linewidth(0.5)
    ax.spines['top'].set_linewidth(0.5)
    ax.spines['right'].set_linewidth(0.5)

    plt.savefig("../../Results/figures/SuppleFig5a.pdf", dpi=400, bbox_inches='tight')

if __name__ == '__main__' :
    main()