|
|
|
|
|
|
|
|
|
|
|
import math |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from matplotlib import rc |
|
from scipy import stats |
|
import seaborn as sns |
|
import pandas as pd |
|
from scipy.stats import ranksums |
|
|
|
|
|
def median(lst): |
|
sortedLst = sorted(lst) |
|
lstLen = len(lst) |
|
index = (lstLen - 1) // 2 |
|
|
|
if (lstLen % 2): |
|
return sortedLst[index] |
|
else: |
|
return (sortedLst[index] + sortedLst[index + 1])/2.0 |
|
|
|
def main() : |
|
|
|
with open('../../Data/enzyme_promiscuity/test_preferred_alternative_random.txt', 'r') as infile : |
|
lines = infile.readlines() |
|
|
|
alldata = dict() |
|
alldata['type'] = list() |
|
alldata['value'] = list() |
|
preferred_substrates = list() |
|
alternative_substrates = list() |
|
random_substrates = list() |
|
for line in lines[1:] : |
|
data = line.strip().split('\t') |
|
order, value, substrate_type = data[0], data[1], data[2] |
|
|
|
if substrate_type == 'Preferred' : |
|
preferred_substrates.append(float(value)) |
|
alldata['type'].append('Preferred') |
|
alldata['value'].append(float(value)) |
|
if substrate_type == 'Alternative' : |
|
alternative_substrates.append(float(value)) |
|
alldata['type'].append('Alternative') |
|
alldata['value'].append(float(value)) |
|
if substrate_type == 'Random' : |
|
random_substrates.append(float(value)) |
|
alldata['type'].append('Random') |
|
alldata['value'].append(float(value)) |
|
|
|
p_value_1 = ranksums(preferred_substrates, alternative_substrates)[1] |
|
p_value_2 = ranksums(preferred_substrates, random_substrates)[1] |
|
p_value_3 = ranksums(alternative_substrates, random_substrates)[1] |
|
print('The amount of preferred_substrates:', len(preferred_substrates)) |
|
print('The amount of alternative_substrates:', len(alternative_substrates)) |
|
print('The amount of random_substrates:', len(random_substrates)) |
|
print('The median value of preferred_substrates: %.4f' % median(preferred_substrates)) |
|
print('The median value of alternative_substrates: %.4f' % median(alternative_substrates)) |
|
print('The median value of random_substrates: %.4f' % median(random_substrates)) |
|
print('The real value of preferred substrates: %.2f' % pow(10, median(preferred_substrates))) |
|
print('The real value of alternative substrates: %.2f' % pow(10, median(alternative_substrates))) |
|
print('The real value of random substrates: %.2f' % pow(10, median(random_substrates))) |
|
print('P value between preferred_substrates and alternative_substrates is: %s' % p_value_1) |
|
print('P value between preferred_substrates and random_substrates is: %s' % p_value_2) |
|
print('P value between alternative_substrates and random_substrates is: %s' % p_value_3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
allData = pd.DataFrame(alldata) |
|
|
|
|
|
plt.figure(figsize=(1.5,1.5)) |
|
|
|
|
|
|
|
rc('font',**{'family':'serif','serif':['Helvetica']}) |
|
plt.rcParams['pdf.fonttype'] = 42 |
|
|
|
plt.axes([0.12,0.12,0.83,0.83]) |
|
|
|
plt.tick_params(direction='in') |
|
plt.tick_params(which='major',length=1.5) |
|
plt.tick_params(which='major',width=0.4) |
|
plt.tick_params(which='major',width=0.4) |
|
|
|
|
|
palette = {"Random": '#FF8C00', "Alternative": '#2166ac', "Preferred": '#b2182b'} |
|
|
|
|
|
|
|
|
|
ax = sns.boxplot(data=alldata, x="type", y="value", order = ["Random", "Alternative", "Preferred"], |
|
palette=palette, showfliers=False, linewidth=0.5, width=0.5) |
|
|
|
ax = sns.stripplot(data=alldata, x="type", y="value", order = ["Random", "Alternative", "Preferred"], |
|
palette=palette, size=0.7) |
|
|
|
|
|
|
|
ax.set(xlabel=None) |
|
|
|
for patch in ax.artists: |
|
r, g, b, a = patch.get_facecolor() |
|
patch.set_facecolor((r, g, b, 0.3)) |
|
|
|
|
|
|
|
|
|
|
|
for i, artist in enumerate(ax.artists): |
|
|
|
|
|
if i % 3 == 0: |
|
col = '#FF8C00' |
|
if i % 3 == 1: |
|
col = '#2166ac' |
|
if i % 3 == 2: |
|
col = '#b2182b' |
|
|
|
|
|
artist.set_edgecolor(col) |
|
|
|
|
|
|
|
for j in range(i*5,i*5+5): |
|
|
|
line = ax.lines[j] |
|
line.set_color(col) |
|
line.set_mfc(col) |
|
line.set_mec(col) |
|
handles = [ax.artists[0], ax.artists[1]] |
|
|
|
|
|
|
|
|
|
plt.rcParams['font.family'] = 'Helvetica' |
|
|
|
plt.ylabel("Predicted $k$$_\mathregular{cat}$ value [log10]", fontname='Helvetica', fontsize=7) |
|
|
|
|
|
|
|
|
|
plt.yticks([-2, 0, 2, 4, 6]) |
|
|
|
plt.xticks(fontsize=7, rotation=30, ha='right') |
|
plt.yticks(fontsize=6) |
|
|
|
ax.spines['bottom'].set_linewidth(0.5) |
|
ax.spines['left'].set_linewidth(0.5) |
|
ax.spines['top'].set_linewidth(0.5) |
|
ax.spines['right'].set_linewidth(0.5) |
|
|
|
plt.savefig("../../Results/figures/SuppleFig5d.pdf", dpi=400, bbox_inches = 'tight') |
|
|
|
|
|
if __name__ == '__main__' : |
|
main() |
|
|