|
|
|
|
|
|
|
|
|
|
|
import os |
|
import csv |
|
import math |
|
import xlrd |
|
import pickle |
|
import numpy as np |
|
import pandas as pd |
|
from rdkit import Chem |
|
from Bio import SeqIO |
|
from collections import defaultdict |
|
from scipy import stats |
|
from scipy.stats import ranksums |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import matplotlib.pyplot as plt |
|
from matplotlib import rc |
|
|
|
def plot_spe_gen() : |
|
with open('../../../BayesianApproach/Results/kcat_gen_spe.txt') as infile : |
|
alllines = infile.readlines()[1:] |
|
|
|
|
|
|
|
|
|
alldata = dict() |
|
alldata['type'] = list() |
|
alldata['clade'] = list() |
|
alldata['Kcat_value'] = list() |
|
|
|
for line in alllines : |
|
kcatValue = float(line.strip().split('\t')[0]) |
|
data_type = line.strip().split('\t')[1] |
|
clade_order = int(line.strip().split('\t')[2]) |
|
|
|
if data_type == 'gen' : |
|
alldata['type'].append('Generalist') |
|
alldata['clade'].append(clade_order) |
|
alldata['Kcat_value'].append(kcatValue) |
|
|
|
for line in alllines : |
|
kcatValue = float(line.strip().split('\t')[0]) |
|
data_type = line.strip().split('\t')[1] |
|
clade_order = int(line.strip().split('\t')[2]) |
|
|
|
if data_type == 'spe' : |
|
alldata['type'].append('Specialist') |
|
alldata['clade'].append(clade_order) |
|
alldata['Kcat_value'].append(kcatValue) |
|
|
|
allData = pd.DataFrame(alldata) |
|
|
|
|
|
for clade in range(1,14) : |
|
print('This is the clade:', clade) |
|
cluster_1 = list() |
|
cluster_2 = list() |
|
|
|
|
|
|
|
|
|
|
|
for row_index, row in allData.iterrows() : |
|
if row['clade'] == clade and row['type'] == 'Specialist' : |
|
|
|
cluster_1.append(row['Kcat_value']) |
|
if row['clade'] == clade and row['type'] == 'Generalist' : |
|
|
|
cluster_2.append(row['Kcat_value']) |
|
|
|
stat, p_value = ranksums(cluster_1,cluster_2) |
|
print('The P_value between the two clusters is:', p_value) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(2.5, 2.0)) |
|
|
|
|
|
rc('font',**{'family':'serif','serif':['Helvetica']}) |
|
plt.rcParams['pdf.fonttype'] = 42 |
|
|
|
plt.axes([0.12,0.12,0.83,0.83]) |
|
|
|
plt.tick_params(direction='in') |
|
plt.tick_params(which='major',length=1.5) |
|
plt.tick_params(which='major',width=0.4) |
|
plt.tick_params(which='major',width=0.4) |
|
|
|
palette = {"Specialist": '#b2182b', "Generalist": '#2166ac'} |
|
|
|
ax = sns.boxplot(data=alldata, x="clade", y="Kcat_value", hue="type", |
|
palette=palette, showfliers=False, linewidth=0.5) |
|
|
|
|
|
|
|
ax.set(xlabel=None) |
|
|
|
|
|
for patch in ax.artists: |
|
r, g, b, a = patch.get_facecolor() |
|
patch.set_facecolor((r, g, b, 0.3)) |
|
|
|
|
|
|
|
|
|
|
|
for i, artist in enumerate(ax.artists): |
|
|
|
|
|
if i % 2 == 0: |
|
col = '#2166ac' |
|
else: |
|
col = '#b2182b' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
artist.set_edgecolor(col) |
|
|
|
|
|
|
|
for j in range(i*5,i*5+5): |
|
|
|
line = ax.lines[j] |
|
line.set_color(col) |
|
line.set_mfc(col) |
|
line.set_mec(col) |
|
handles = [ax.artists[0], ax.artists[1]] |
|
|
|
|
|
|
|
|
|
plt.rcParams['font.family'] = 'Helvetica' |
|
|
|
for i in range(13) : |
|
plt.text(i-0.3, 2.95, '***', fontweight ="normal", fontsize=6) |
|
|
|
plt.ylabel("$k$$_\mathregular{cat}$ value", fontname='Helvetica', fontsize=7) |
|
|
|
plt.xticks(rotation=30,ha='right') |
|
plt.ylim(-2,5) |
|
plt.yticks([-2,-1,0,1,2,3,4,5]) |
|
plt.xticks(fontsize=7) |
|
plt.yticks(fontsize=6) |
|
|
|
ax.spines['bottom'].set_linewidth(0.5) |
|
ax.spines['left'].set_linewidth(0.5) |
|
ax.spines['top'].set_linewidth(0.5) |
|
ax.spines['right'].set_linewidth(0.5) |
|
|
|
ax = plt.gca() |
|
|
|
labels = ax.get_legend_handles_labels()[1] |
|
|
|
|
|
|
|
lgd = plt.legend(handles[0:2], labels[0:2], loc=1, frameon=False, prop={'size': 6}) |
|
|
|
|
|
plt.savefig("../../Results/figures/SuppleFig8a.pdf", dpi=400, bbox_inches = 'tight') |
|
|
|
|
|
if __name__ == '__main__' : |
|
plot_spe_gen() |
|
|