Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
Streamlit/Untitled.ipynb +293 -0
Streamlit/header.png +0 -0
Streamlit/main.py +205 -0
Streamlit/streamlit-main-2022-10-10-17-10-73.webm +3 -0
Streamlit/test_run_file.py +176 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Streamlit/streamlit-main-2022-10-10-17-10-73.webm filter=lfs diff=lfs merge=lfs -text

Streamlit/Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,293 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'rdkit'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [1], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chem\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mChem\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AllChem\n\u001b[1;32m      6\u001b[0m \u001b[38;5;66;03m# from rdkit.Chem import Draw\u001b[39;00m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'rdkit'"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import AllChem\n",
+    "# from rdkit.Chem import Draw\n",
+    "from rdkit.Chem import rdChemReactions as Reactions\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "from keras.preprocessing import sequence\n",
+    "from keras.utils import pad_sequences\n",
+    "import keras\n",
+    "from keras import backend as K\n",
+    "from keras.models import load_model\n",
+    "import argparse\n",
+    "import h5py\n",
+    "import pdb\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "seq_rdic = ['A', 'I', 'L', 'V', 'F', 'W', 'Y', 'N', 'C', 'Q', 'M','S', 'T', 'D', 'E', 'R', 'H', 'K', 'G', 'P', 'O', 'U', 'X', 'B', 'Z']\n",
+    "seq_dic = {w: i+1 for i, w in enumerate(seq_rdic)}\n",
+    "\n",
+    "\n",
+    "def encodeSeq(seq, seq_dic):\n",
+    "    if pd.isnull(seq):\n",
+    "        return [0]\n",
+    "    else:\n",
+    "        return [seq_dic[aa] for aa in seq]\n",
+    "\n",
+    "\n",
+    "def load_modelfile(model_string):\n",
+    "\tloaded_model = tf.keras.models.load_model(model_string)\n",
+    "\treturn loaded_model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'load_modelfile' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [4], line 80\u001b[0m\n\u001b[1;32m     72\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m prediction_vals[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m     75\u001b[0m \u001b[38;5;66;03m# loaded_model = load_modelfile('./../CNN_results/model_final.model')\u001b[39;00m\n\u001b[1;32m     76\u001b[0m \n\u001b[1;32m     77\u001b[0m \u001b[38;5;66;03m# KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')\u001b[39;00m\n\u001b[1;32m     78\u001b[0m \u001b[38;5;66;03m# kegg_df = KEGG_compound_read.reset_index()\u001b[39;00m\n\u001b[0;32m---> 80\u001b[0m loaded_model \u001b[38;5;241m=\u001b[39m load_modelfile(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./../CNN_results_split_final/Final_model.model\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     81\u001b[0m KEGG_compound_read \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./../CNN_data/Final_test/kegg_compound.csv\u001b[39m\u001b[38;5;124m'\u001b[39m, index_col \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCompound_ID\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     82\u001b[0m kegg_df \u001b[38;5;241m=\u001b[39m KEGG_compound_read\u001b[38;5;241m.\u001b[39mreset_index()\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'load_modelfile' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "def prot_feature_gen_from_str_input(prot_input_str, prot_len=2500):\n",
+    "    Prot_ID = prot_input_str.split(':')[0]\n",
+    "    Prot_seq = prot_input_str.split(':')[1]\n",
+    "    prot_dataframe = pd.DataFrame(\n",
+    "        {'Protein_ID': Prot_ID, 'Sequence': Prot_seq}, index=[0])\n",
+    "    prot_dataframe.set_index('Protein_ID')\n",
+    "\n",
+    "    prot_dataframe[\"encoded_sequence\"] = prot_dataframe.Sequence.map(\n",
+    "        lambda a: encodeSeq(a, seq_dic))\n",
+    "    prot_feature = pad_sequences(\n",
+    "        prot_dataframe[\"encoded_sequence\"].values, prot_len)\n",
+    "\n",
+    "    return prot_feature, Prot_ID\n",
+    "\n",
+    "\n",
+    "def mol_feature_gen_from_str_input(mol_str, kegg_id_flag, kegg_df):\n",
+    "\n",
+    "\tif kegg_id_flag == 1:\n",
+    "\t\tKEGG_ID = mol_str\n",
+    "\t\tkegg_id_loc = kegg_df.index[kegg_df.Compound_ID == KEGG_ID][0]\n",
+    "\t\tKEGG_ID_info = kegg_df.loc[kegg_id_loc]\n",
+    "\t\tKEGG_ID_info_df = KEGG_ID_info.to_frame().T.set_index('Compound_ID')\n",
+    "\n",
+    "\t\tfinal_return = KEGG_ID_info_df\n",
+    "\t\tfinal_id = KEGG_ID\n",
+    "\n",
+    "\telse:\n",
+    "\t\ttry:\n",
+    "\t\t\tmol_ID = mol_str.split(':')[0]\n",
+    "\t\t\tmol_smiles = mol_str.split(':')[1]\n",
+    "\t\t\tmol = Chem.MolFromSmiles(mol_smiles)\n",
+    "\t\t\tfp1 = AllChem.GetMorganFingerprintAsBitVect(\n",
+    "\t\t\t    mol, useChirality=True, radius=2, nBits=2048)\n",
+    "\t\t\tfp_list = list(np.array(fp1).astype(float))\n",
+    "\t\t\tfp_str = list(map(str, fp_list))\n",
+    "\t\t\tmol_fp = '\\t'.join(fp_str)\n",
+    "\n",
+    "\t\t\tmol_dict = {}\n",
+    "\t\t\tmol_dict['Compound_ID'] = mol_ID\n",
+    "\t\t\tmol_dict['Smiles'] = mol_smiles\n",
+    "\t\t\tmol_dict['morgan_fp_r2'] = mol_fp\n",
+    "\n",
+    "\t\t\tmol_info_df = pd.DataFrame(mol_dict, index=[0])\n",
+    "\t\t\tmol_info_df.set_index('Compound_ID')\n",
+    "\n",
+    "\t\t\tfinal_return = mol_info_df\n",
+    "\t\t\tfinal_id = mol_ID\n",
+    "\n",
+    "\t\texcept Exception as error:\n",
+    "\t\t\tprint('Something wrong with molecule input string...' + repr(error))\n",
+    "\n",
+    "\treturn final_return, final_id\n",
+    "\n",
+    "\n",
+    "def act_df_gen_mol_feature(mol_id, prot_id):\n",
+    "\tact_df = pd.DataFrame(\n",
+    "\t    {'Protein_ID': prot_id, 'Compound_ID': mol_id}, index=[0])\n",
+    "\n",
+    "\treturn act_df\n",
+    "\n",
+    "\n",
+    "def compound_feature_gen_df_input(act_df, comp_df, comp_len=2048, comp_vec='morgan_fp_r2'):\n",
+    "\tact_df = pd.merge(act_df, comp_df, left_on='Compound_ID', right_index=True)\n",
+    "\tcomp_feature = np.stack(act_df[comp_vec].map(lambda fp: fp.split(\"\\t\")))\n",
+    "\tcomp_feature = comp_feature.astype('float')\n",
+    "\treturn comp_feature\n",
+    "\n",
+    "\n",
+    "def model_prediction(compound_feature, enz_feature, model):\n",
+    "    prediction_vals = model.predict([compound_feature, enz_feature])\n",
+    "\n",
+    "    return prediction_vals[0][0]\n",
+    "\n",
+    "\n",
+    "# loaded_model = load_modelfile('./../CNN_results/model_final.model')\n",
+    "\n",
+    "# KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')\n",
+    "# kegg_df = KEGG_compound_read.reset_index()\n",
+    "\n",
+    "loaded_model = load_modelfile('./../CNN_results_split_final/Final_model.model')\n",
+    "KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')\n",
+    "kegg_df = KEGG_compound_read.reset_index()\n",
+    "\n",
+    "\n",
+    "# def img_to_bytes(img_path):\n",
+    "#     img_bytes = Path(img_path).read_bytes()\n",
+    "#     encoded = base64.b64encode(img_bytes).decode()\n",
+    "#     return encoded\n",
+    "# # st.title('dGPredictor')\n",
+    "\n",
+    "# header_html = \"<img src='../figures/header.png'>\"\n",
+    "\n",
+    "# st.markdown(\n",
+    "#     header_html, unsafe_allow_html=True,\n",
+    "# )\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Error somewhere...NameError(\"name 'prot_feature_gen_from_str_input' is not defined\")\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'compound_feature1' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [3], line 16\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError somewhere...\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mrepr\u001b[39m(e))\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mtype\u001b[39m(compound_feature1))\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'compound_feature1' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "enz_str =\"A0A4P8WFA8:MTKRVLVTGGAGFLGSHLCERLLSEGHEVICLDNFGSGRRKNIKEFEDHPSFKVNDRDVRISESLPSVDRIYHLASRASPADFTQFPVNIALANTQGTRRLLDQARACDARMVFASTSEVYGDPKVHPQPETYTGNVNIRGARGCYDESKRFGETLTVAYQRKYDVDARTVRIFNTYGPRMRPDDGRVVPTFVTQALRGDDLTIYGDGEQTRSFCYVDDLIEGLISLMRVDNPEHNVYNIGKENERTIKELAYEVLGLTDTESDIVYEPLPEDDPGQRRPDITRAKTELDWEPKISLREGLEDTITYFDN\"\n",
+    "\n",
+    "comp_str = 'C00149:O[C@@H](CC([O-])=O)C([O-])=O'\n",
+    "try:\n",
+    "    prot_feature, prot_id = prot_feature_gen_from_str_input(enz_str)\n",
+    "    kegg_id_flag = 0\n",
+    "    comp_feature, comp_id = mol_feature_gen_from_str_input(comp_str, kegg_id_flag, kegg_df)\n",
+    "\n",
+    "    act_dataframe = act_df_gen_mol_feature(comp_id, prot_id)\n",
+    "    # pdb.set_trace()\n",
+    "    compound_feature1 = compound_feature_gen_df_input(act_dataframe, comp_feature)\n",
+    "\n",
+    "except Exception as e:\n",
+    "    print('Error somewhere...' + repr(e))\n",
+    "\n",
+    "print(type(compound_feature1))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1/1 [==============================] - 0s 223ms/step\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "EnzRankScore = model_prediction(compound_feature1, prot_feature, loaded_model)\n",
+    "es = EnzRankScore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9315796"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "es"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

Streamlit/header.png ADDED Viewed

Streamlit/main.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import re
+from PIL import Image
+import webbrowser
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from rdkit.Chem import Draw
+from rdkit.Chem import rdChemReactions as Reactions
+import tensorflow as tf
+from tensorflow import keras
+from keras.preprocessing import sequence
+from keras.utils import pad_sequences
+import keras
+from keras import backend as K
+from keras.models import load_model
+import argparse
+import h5py
+import pdb
+seq_rdic = ['A', 'I', 'L', 'V', 'F', 'W', 'Y', 'N', 'C', 'Q', 'M',
+    'S', 'T', 'D', 'E', 'R', 'H', 'K', 'G', 'P', 'O', 'U', 'X', 'B', 'Z']
+seq_dic = {w: i+1 for i, w in enumerate(seq_rdic)}
+@st.cache(allow_output_mutation=True)
+def encodeSeq(seq, seq_dic):
+    if pd.isnull(seq):
+        return [0]
+    else:
+        return [seq_dic[aa] for aa in seq]
+@st.cache(allow_output_mutation=True)
+def load_modelfile(model_string):
+	loaded_model = tf.keras.models.load_model(model_string)
+	return loaded_model
+@st.cache(allow_output_mutation=True)
+def prot_feature_gen_from_str_input(prot_input_str, prot_len=2500):
+    Prot_ID = prot_input_str.split(':')[0]
+    Prot_seq = prot_input_str.split(':')[1]
+    prot_dataframe = pd.DataFrame(
+        {'Protein_ID': Prot_ID, 'Sequence': Prot_seq}, index=[0])
+    prot_dataframe.set_index('Protein_ID')
+    prot_dataframe["encoded_sequence"] = prot_dataframe.Sequence.map(
+        lambda a: encodeSeq(a, seq_dic))
+    prot_feature = pad_sequences(
+        prot_dataframe["encoded_sequence"].values, prot_len)
+    return prot_feature, Prot_ID
+@st.cache(allow_output_mutation=True)
+def mol_feature_gen_from_str_input(mol_str, kegg_id_flag, kegg_df):
+	if kegg_id_flag == 1:
+		KEGG_ID = mol_str
+		kegg_id_loc = kegg_df.index[kegg_df.Compound_ID == KEGG_ID][0]
+		KEGG_ID_info = kegg_df.loc[kegg_id_loc]
+		KEGG_ID_info_df = KEGG_ID_info.to_frame().T.set_index('Compound_ID')
+		final_return = KEGG_ID_info_df
+		final_id = KEGG_ID
+	else:
+		try:
+			mol_ID = mol_str.split(':')[0]
+			mol_smiles = mol_str.split(':')[1]
+			mol = Chem.MolFromSmiles(mol_smiles)
+			fp1 = AllChem.GetMorganFingerprintAsBitVect(
+			    mol, useChirality=True, radius=2, nBits=2048)
+			fp_list = list(np.array(fp1).astype(float))
+			fp_str = list(map(str, fp_list))
+			mol_fp = '\t'.join(fp_str)
+			mol_dict = {}
+			mol_dict['Compound_ID'] = mol_ID
+			mol_dict['Smiles'] = mol_smiles
+			mol_dict['morgan_fp_r2'] = mol_fp
+			mol_info_df = pd.DataFrame(mol_dict, index=[0])
+			mol_info_df = mol_info_df.set_index('Compound_ID')
+			final_return = mol_info_df
+			final_id = mol_ID
+		except Exception as error:
+			print('Something wrong with molecule input string...' + repr(error))
+	return final_return, final_id
+@st.cache(allow_output_mutation=True)
+def act_df_gen_mol_feature(mol_id, prot_id):
+	act_df = pd.DataFrame(
+	    {'Protein_ID': prot_id, 'Compound_ID': mol_id}, index=[0])
+	return act_df
+@st.cache(allow_output_mutation=True)
+def compound_feature_gen_df_input(act_df, comp_df, comp_len=2048, comp_vec='morgan_fp_r2'):
+	act_df = pd.merge(act_df, comp_df, left_on='Compound_ID', right_index=True)
+	comp_feature = np.stack(act_df[comp_vec].map(lambda fp: fp.split("\t")))
+	comp_feature = comp_feature.astype('float')
+	return comp_feature
+@st.cache(allow_output_mutation=True)
+def model_prediction(compound_feature, enz_feature, model):
+    prediction_vals = model.predict([compound_feature, enz_feature])
+    return prediction_vals[0][0]
+# loaded_model = load_modelfile('./../CNN_results/model_final.model')
+# KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')
+# kegg_df = KEGG_compound_read.reset_index()
+def main():
+	graph = tf.compat.v1.get_default_graph()
+	ld_model = tf.keras.models.load_model('./../CNN_results_split_final/Final_model.model')
+	KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')
+	kegg_df = KEGG_compound_read.reset_index()
+    # def img_to_bytes(img_path):
+    #     img_bytes = Path(img_path).read_bytes()
+    #     encoded = base64.b64encode(img_bytes).decode()
+    #     return encoded
+    # # st.title('dGPredictor')
+    # header_html = "<img src='../figures/header.png'>"
+    # st.markdown(
+    #     header_html, unsafe_allow_html=True,
+    # )
+	st.image('./header.png', use_column_width=True)
+	st.subheader('Enzyme-Substrate Activity Predictor ')
+	st.subheader('Enzyme sequence')
+	st.caption('Please follow the input format show in the text box--> id:Sequence')
+	enz_str = st.text_input('', value="A0A4P8WFA8:MTKRVLVTGGAGFLGSHLCERLLSEGHEVICLDNFGSGRRKNIKEFEDHPSFKVNDRDVRISESLPSVDRIYHLASRASPADFTQFPVNIALANTQGTRRLLDQARACDARMVFASTSEVYGDPKVHPQPETYTGNVNIRGARGCYDESKRFGETLTVAYQRKYDVDARTVRIFNTYGPRMRPDDGRVVPTFVTQALRGDDLTIYGDGEQTRSFCYVDDLIEGLISLMRVDNPEHNVYNIGKENERTIKELAYEVLGLTDTESDIVYEPLPEDDPGQRRPDITRAKTELDWEPKISLREGLEDTITYFDN")
+    # url = 'https://www.genome.jp/dbget-bin/www_bget?rn:R00801'
+    # if st.button('KEformat example'):
+    #     webbrowser.open_new_tab(url)
+	st.subheader('Substrate ')
+	st.caption('Please follow the input format show in the text box--> KEGG id or click the checkbox')
+	comp_str = st.text_input('', value="C00149")
+	if st.checkbox('If you are entering smiles string along with KEGG ID'):
+		add_info = st.text_area('Additional information (id: Smiles):', "C00149:O[C@@H](CC([O-])=O)C([O-])=O")
+	else:
+		add_info = ''
+	if st.button("Predict"):
+    # if session_state.button_search:
+# 		st.subheader('Enzyme-Substrate activity score')
+		with st.spinner('Calculating...'):
+			try:
+# 				st.write('I am inside')
+				prot_feature, prot_id = prot_feature_gen_from_str_input(enz_str)
+				if len(add_info) == 0:
+					kegg_id_flag = 1
+					comp_feature, comp_id = mol_feature_gen_from_str_input(comp_str, kegg_id_flag, kegg_df)
+				else:
+					kegg_id_flag = 0
+					comp_feature, comp_id = mol_feature_gen_from_str_input(add_info, kegg_id_flag, kegg_df)
+				act_dataframe = act_df_gen_mol_feature(comp_id, prot_id)
+# 				st.write(act_dataframe)
+				compound_feature = compound_feature_gen_df_input(act_dataframe, comp_feature)
+# 				st.write(compound_feature)
+			except Exception as e:
+				st.write('Error somewhere...' + repr(e))
+# 			st.write(compound_feature)
+# 			st.write(prot_feature)
+# 			keras.backend.clear_session()
+			y = ld_model.predict([compound_feature, prot_feature])
+			subheaderstring = 'EnzRank Score for '+ prot_id +  '-' + comp_id + ' pair:'
+			st.subheader(subheaderstring)
+			st.write(str(y[0][0]))
+if __name__ == '__main__':
+    main()

Streamlit/streamlit-main-2022-10-10-17-10-73.webm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:491b9f0b51969d6171fc1daccf7de1f97a8a8b616a2d2c13afaf252bf23c753c
+size 7700063

Streamlit/test_run_file.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import pandas as pd
+import numpy as np
+from rdkit import Chem
+from rdkit.Chem import AllChem
+# from rdkit.Chem import Draw
+from rdkit.Chem import rdChemReactions as Reactions
+import tensorflow as tf
+from tensorflow import keras
+from keras.preprocessing import sequence
+from keras.utils import pad_sequences
+import keras
+from keras import backend as K
+from keras.models import load_model
+import argparse
+import h5py
+import pdb
+seq_rdic = ['A', 'I', 'L', 'V', 'F', 'W', 'Y', 'N', 'C', 'Q', 'M','S', 'T', 'D', 'E', 'R', 'H', 'K', 'G', 'P', 'O', 'U', 'X', 'B', 'Z']
+seq_dic = {w: i+1 for i, w in enumerate(seq_rdic)}
+def encodeSeq(seq, seq_dic):
+    if pd.isnull(seq):
+        return [0]
+    else:
+        return [seq_dic[aa] for aa in seq]
+def load_modelfile(model_string):
+	loaded_model = tf.keras.models.load_model(model_string)
+	return loaded_model
+def prot_feature_gen_from_str_input(prot_input_str, prot_len=2500):
+    Prot_ID = prot_input_str.split(':')[0]
+    Prot_seq = prot_input_str.split(':')[1]
+    prot_dataframe = pd.DataFrame(
+        {'Protein_ID': Prot_ID, 'Sequence': Prot_seq}, index=[0])
+    prot_dataframe.set_index('Protein_ID')
+    prot_dataframe["encoded_sequence"] = prot_dataframe.Sequence.map(
+        lambda a: encodeSeq(a, seq_dic))
+    prot_feature = pad_sequences(
+        prot_dataframe["encoded_sequence"].values, prot_len)
+    return prot_feature, Prot_ID
+def mol_feature_gen_from_str_input(mol_str, kegg_id_flag, kegg_df):
+	if kegg_id_flag == 1:
+		KEGG_ID = mol_str
+		kegg_id_loc = kegg_df.index[kegg_df.Compound_ID == KEGG_ID][0]
+		KEGG_ID_info = kegg_df.loc[kegg_id_loc]
+		KEGG_ID_info_df = KEGG_ID_info.to_frame().T.set_index('Compound_ID')
+		final_return = KEGG_ID_info_df
+		final_id = KEGG_ID
+	else:
+		try:
+			mol_ID = mol_str.split(':')[0]
+			mol_smiles = mol_str.split(':')[1]
+			mol = Chem.MolFromSmiles(mol_smiles)
+			fp1 = AllChem.GetMorganFingerprintAsBitVect(
+			    mol, useChirality=True, radius=2, nBits=2048)
+			fp_list = list(np.array(fp1).astype(float))
+			fp_str = list(map(str, fp_list))
+			mol_fp = '\t'.join(fp_str)
+			mol_dict = {}
+			mol_dict['Compound_ID'] = mol_ID
+			mol_dict['Smiles'] = mol_smiles
+			mol_dict['morgan_fp_r2'] = mol_fp
+			mol_info_df = pd.DataFrame(mol_dict, index=[0])
+			mol_info_df.set_index('Compound_ID')
+			final_return = mol_info_df
+			final_id = mol_ID
+		except Exception as error:
+			print('Something wrong with molecule input string...' + repr(error))
+	return final_return, final_id
+def act_df_gen_mol_feature(mol_id, prot_id):
+	act_df = pd.DataFrame(
+	    {'Protein_ID': prot_id, 'Compound_ID': mol_id}, index=[0])
+	return act_df
+def compound_feature_gen_df_input(act_df, comp_df, comp_len=2048, comp_vec='morgan_fp_r2'):
+	act_df = pd.merge(act_df, comp_df, left_on='Compound_ID', right_index=True)
+	comp_feature = np.stack(act_df[comp_vec].map(lambda fp: fp.split("\t")))
+	comp_feature = comp_feature.astype('float')
+	return comp_feature
+def model_prediction(compound_feature, enz_feature, model):
+    prediction_vals = model.predict([compound_feature, enz_feature])
+    return prediction_vals[0][0]
+# loaded_model = load_modelfile('./../CNN_results/model_final.model')
+# KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')
+# kegg_df = KEGG_compound_read.reset_index()
+def main():
+	loaded_model = load_modelfile('./../CNN_results_split_final/Final_model.model')
+	KEGG_compound_read = pd.read_csv('./../CNN_data/Final_test/kegg_compound.csv', index_col = 'Compound_ID')
+	kegg_df = KEGG_compound_read.reset_index()
+	# print(loaded_model.summary())
+    # def img_to_bytes(img_path):
+    #     img_bytes = Path(img_path).read_bytes()
+    #     encoded = base64.b64encode(img_bytes).decode()
+    #     return encoded
+    # # st.title('dGPredictor')
+    # header_html = "<img src='../figures/header.png'>"
+    # st.markdown(
+    #     header_html, unsafe_allow_html=True,
+    # )
+	enz_str ="A0A4P8WFA8:MTKRVLVTGGAGFLGSHLCERLLSEGHEVICLDNFGSGRRKNIKEFEDHPSFKVNDRDVRISESLPSVDRIYHLASRASPADFTQFPVNIALANTQGTRRLLDQARACDARMVFASTSEVYGDPKVHPQPETYTGNVNIRGARGCYDESKRFGETLTVAYQRKYDVDARTVRIFNTYGPRMRPDDGRVVPTFVTQALRGDDLTIYGDGEQTRSFCYVDDLIEGLISLMRVDNPEHNVYNIGKENERTIKELAYEVLGLTDTESDIVYEPLPEDDPGQRRPDITRAKTELDWEPKISLREGLEDTITYFDN"
+	comp_str = "C00149"
+	try:
+		prot_feature, prot_id = prot_feature_gen_from_str_input(enz_str)
+		kegg_id_flag = 1
+		comp_feature, comp_id = mol_feature_gen_from_str_input(comp_str, kegg_id_flag, kegg_df)
+		act_dataframe = act_df_gen_mol_feature(comp_id, prot_id)
+		# pdb.set_trace()
+		compound_feature = compound_feature_gen_df_input(act_dataframe, comp_feature)
+	except Exception as e:
+		print('Error somewhere...' + repr(e))
+	# print(type(compound_feature1))
+	# print(loaded_model.predict([compound_feature1, prot_feature]))
+	EnzRankScore = model_prediction(compound_feature, prot_feature, loaded_model)
+	es = EnzRankScore
+	print('something has happened')
+	print('EnzRank score')
+	print(es)
+	# print(type(es))
+	# print(type(EnzRankScore))
+# 	graph = tf.compat.v1.get_default_graph()
+# 	with graph.as_default():
+# 		y = loaded_model.predict([compound_feature, prot_feature])
+# 	print('-----------')
+# 	print(y)
+# 	print(type(y[0][0]))
+# 	print(y[0][0])
+if __name__ == '__main__':
+    main()