Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| from PIL import Image | |
| import webbrowser | |
| import json | |
| import pickle | |
| import sys | |
| import joblib | |
| import sys | |
| import os | |
| try: | |
| from openbabel import openbabel | |
| except: | |
| print ("openbabel not found, Installing openbabel ") | |
| sys.path.append('/home/user/app/local/') | |
| s2 = os.system("wget 'https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh' && bash Miniconda3-4.5.4-Linux-x86_64.sh -bfp /home/user/app/local") | |
| s1 = os.system("/home/user/app/local/bin/conda install -c conda-forge openbabel -y") | |
| print(s2) | |
| print(s1) | |
| from openbabel import openbabel | |
| sys.path.append('./CC/') | |
| import chemaxon | |
| from chemaxon import * | |
| from compound import Compound | |
| from compound_cacher import CompoundCacher | |
| from rdkit.Chem import rdChemReactions as Reactions | |
| from rdkit.Chem import Draw | |
| from rdkit import Chem | |
| def load_smiles(): | |
| db = pd.read_csv('./data/cache_compounds_20160818.csv', | |
| index_col='compound_id') | |
| db_smiles = db['smiles_pH7'].to_dict() | |
| return db_smiles | |
| def load_molsig_rad1(): | |
| molecular_signature_r1 = json.load(open('./data/decompose_vector_ac.json')) | |
| return molecular_signature_r1 | |
| def load_molsig_rad2(): | |
| molecular_signature_r2 = json.load( | |
| open('./data/decompose_vector_ac_r2_py3_indent_modified_manual.json')) | |
| return molecular_signature_r2 | |
| def load_model(): | |
| filename = './model/M12_model_BR.pkl' | |
| loaded_model = joblib.load(open(filename, 'rb')) | |
| return loaded_model | |
| def load_compound_cache(): | |
| ccache = CompoundCacher() | |
| return ccache | |
| def count_substructures(radius, molecule): | |
| """Helper function for get the information of molecular signature of a | |
| metabolite. The relaxed signature requires the number of each substructure | |
| to construct a matrix for each molecule. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| molecule : Molecule | |
| a molecule object create by RDkit (e.g. Chem.MolFromInchi(inchi_code) | |
| or Chem.MolToSmiles(smiles_code)) | |
| Returns | |
| ------- | |
| dict | |
| dictionary of molecular signature for a molecule, | |
| {smiles: molecular_signature} | |
| """ | |
| m = molecule | |
| smi_count = dict() | |
| atomList = [atom for atom in m.GetAtoms()] | |
| for i in range(len(atomList)): | |
| env = Chem.FindAtomEnvironmentOfRadiusN(m, radius, i) | |
| atoms = set() | |
| for bidx in env: | |
| atoms.add(m.GetBondWithIdx(bidx).GetBeginAtomIdx()) | |
| atoms.add(m.GetBondWithIdx(bidx).GetEndAtomIdx()) | |
| # only one atom is in this environment, such as O in H2O | |
| if len(atoms) == 0: | |
| atoms = {i} | |
| smi = Chem.MolFragmentToSmiles(m, atomsToUse=list(atoms), | |
| bondsToUse=env, canonical=True) | |
| if smi in smi_count: | |
| smi_count[smi] = smi_count[smi] + 1 | |
| else: | |
| smi_count[smi] = 1 | |
| return smi_count | |
| def decompse_novel_mets_rad1(novel_smiles, radius=1): | |
| decompose_vector = dict() | |
| for cid, smiles_pH7 in novel_smiles.items(): | |
| mol = Chem.MolFromSmiles(smiles_pH7) | |
| mol = Chem.RemoveHs(mol) | |
| # Chem.RemoveStereochemistry(mol) | |
| smi_count = count_substructures(radius, mol) | |
| decompose_vector[cid] = smi_count | |
| return decompose_vector | |
| def decompse_novel_mets_rad2(novel_smiles, radius=2): | |
| decompose_vector = dict() | |
| for cid, smiles_pH7 in novel_smiles.items(): | |
| mol = Chem.MolFromSmiles(smiles_pH7) | |
| mol = Chem.RemoveHs(mol) | |
| # Chem.RemoveStereochemistry(mol) | |
| smi_count = count_substructures(radius, mol) | |
| decompose_vector[cid] = smi_count | |
| return decompose_vector | |
| # def parse_rule(rxn,df_rule): | |
| # df = df_rule | |
| # rule_df = df[rxn].to_frame() | |
| # # new_df = rule_df[(rule_df.T != 0).any()] | |
| # return rule_df[(rule_df.T != 0).any()] | |
| def parse_reaction_formula_side(s): | |
| """ | |
| Parses the side formula, e.g. '2 C00001 + C00002 + 3 C00003' | |
| Ignores stoichiometry. | |
| Returns: | |
| The set of CIDs. | |
| """ | |
| if s.strip() == "null": | |
| return {} | |
| compound_bag = {} | |
| for member in re.split('\s+\+\s+', s): | |
| tokens = member.split(None, 1) | |
| if len(tokens) == 0: | |
| continue | |
| if len(tokens) == 1: | |
| amount = 1 | |
| key = member | |
| else: | |
| amount = float(tokens[0]) | |
| key = tokens[1] | |
| compound_bag[key] = compound_bag.get(key, 0) + amount | |
| return compound_bag | |
| def parse_formula(formula, arrow='<=>', rid=None): | |
| """ | |
| Parses a two-sided formula such as: 2 C00001 => C00002 + C00003 | |
| Return: | |
| The set of substrates, products and the direction of the reaction | |
| """ | |
| tokens = formula.split(arrow) | |
| if len(tokens) < 2: | |
| print(('Reaction does not contain the arrow sign (%s): %s' | |
| % (arrow, formula))) | |
| if len(tokens) > 2: | |
| print(('Reaction contains more than one arrow sign (%s): %s' | |
| % (arrow, formula))) | |
| left = tokens[0].strip() | |
| right = tokens[1].strip() | |
| sparse_reaction = {} | |
| for cid, count in parse_reaction_formula_side(left).items(): | |
| sparse_reaction[cid] = sparse_reaction.get(cid, 0) - count | |
| for cid, count in parse_reaction_formula_side(right).items(): | |
| sparse_reaction[cid] = sparse_reaction.get(cid, 0) + count | |
| return sparse_reaction | |
| def draw_rxn_figure(rxn_dict, db_smiles, novel_smiles): | |
| # db_smiles = load_smiles() | |
| left = '' | |
| right = '' | |
| for met, stoic in rxn_dict.items(): | |
| if met == "C00080" or met == "C00282": | |
| continue # hydogen is not considered | |
| if stoic > 0: | |
| if met in db_smiles: | |
| right = right + db_smiles[met] + '.' | |
| else: | |
| right = right + novel_smiles[met] + '.' | |
| else: | |
| if met in db_smiles: | |
| left = left + db_smiles[met] + '.' | |
| else: | |
| left = left + novel_smiles[met] + '.' | |
| smarts = left[:-1] + '>>' + right[:-1] | |
| # print smarts | |
| smarts = str(smarts) | |
| rxn = Reactions.ReactionFromSmarts(smarts, useSmiles=True) | |
| return Draw.ReactionToImage(rxn) # , subImgSize=(400, 400)) | |
| # def draw_group_changes(rxn,df_rule): | |
| # df = parse_rule(rxn,df_rule) | |
| # group_dict = df.to_dict()[rxn] | |
| # left = '' | |
| # right = '' | |
| # for smiles,stoic in group_dict.iteritems(): | |
| # if stoic > 0: | |
| # right = right + smiles + '.' | |
| # else: | |
| # left = left + smiles + '.' | |
| # smarts = left[:-1] + '>>' + right[:-1] | |
| # rxn = Reactions.ReactionFromSmarts(smarts, useSmiles=True) | |
| # return Draw.ReactionToImage(rxn) | |
| # def get_rxn_rule(rid): | |
| # reaction_dict = json.load(open('../data/optstoic_v3_Sji_dict.json')) | |
| # molecular_signature = json.load(open('../data/decompose_vector_ac.json')) | |
| # molsigna_df = pd.DataFrame.from_dict(molecular_signature).fillna(0) | |
| # all_mets = molsigna_df.columns.tolist() | |
| # all_mets.append("C00080") | |
| # all_mets.append("C00282") | |
| # rule_df = pd.DataFrame(index=molsigna_df.index) | |
| # info = reaction_dict[rid] | |
| # # skip the reactions with missing metabolites | |
| # mets = info.keys() | |
| # flag = False | |
| # for met in mets: | |
| # if met not in all_mets: | |
| # flag = True | |
| # break | |
| # if flag: | |
| # return None | |
| # rule_df[rid] = 0 | |
| # for met, stoic in info.items(): | |
| # if met == "C00080" or met == "C00282": | |
| # continue # hydogen is zero | |
| # rule_df[rid] += molsigna_df[met] * stoic | |
| # return rule_df | |
| def get_rule(rxn_dict, molsig1, molsig2, novel_decomposed1, novel_decomposed2): | |
| if novel_decomposed1 != None: | |
| for cid in novel_decomposed1: | |
| molsig1[cid] = novel_decomposed1[cid] | |
| if novel_decomposed2 != None: | |
| for cid in novel_decomposed2: | |
| molsig2[cid] = novel_decomposed2[cid] | |
| molsigna_df1 = pd.DataFrame.from_dict(molsig1).fillna(0) | |
| all_mets1 = molsigna_df1.columns.tolist() | |
| all_mets1.append("C00080") | |
| all_mets1.append("C00282") | |
| molsigna_df2 = pd.DataFrame.from_dict(molsig2).fillna(0) | |
| all_mets2 = molsigna_df2.columns.tolist() | |
| all_mets2.append("C00080") | |
| all_mets2.append("C00282") | |
| moieties_r1 = open('./data/group_names_r1.txt') | |
| moieties_r2 = open('./data/group_names_r2_py3_modified_manual.txt') | |
| moie_r1 = moieties_r1.read().splitlines() | |
| moie_r2 = moieties_r2.read().splitlines() | |
| molsigna_df1 = molsigna_df1.reindex(moie_r1) | |
| molsigna_df2 = molsigna_df2.reindex(moie_r2) | |
| rule_df1 = pd.DataFrame(index=molsigna_df1.index) | |
| rule_df2 = pd.DataFrame(index=molsigna_df2.index) | |
| # for rid, value in reaction_dict.items(): | |
| # # skip the reactions with missing metabolites | |
| # mets = value.keys() | |
| # flag = False | |
| # for met in mets: | |
| # if met not in all_mets: | |
| # flag = True | |
| # break | |
| # if flag: continue | |
| rule_df1['change'] = 0 | |
| for met, stoic in rxn_dict.items(): | |
| if met == "C00080" or met == "C00282": | |
| continue # hydogen is zero | |
| rule_df1['change'] += molsigna_df1[met] * stoic | |
| rule_df2['change'] = 0 | |
| for met, stoic in rxn_dict.items(): | |
| if met == "C00080" or met == "C00282": | |
| continue # hydogen is zero | |
| rule_df2['change'] += molsigna_df2[met] * stoic | |
| rule_vec1 = rule_df1.to_numpy().T | |
| rule_vec2 = rule_df2.to_numpy().T | |
| m1, n1 = rule_vec1.shape | |
| m2, n2 = rule_vec2.shape | |
| zeros1 = np.zeros((m1, 44)) | |
| zeros2 = np.zeros((m2, 44)) | |
| X1 = np.concatenate((rule_vec1, zeros1), 1) | |
| X2 = np.concatenate((rule_vec2, zeros2), 1) | |
| rule_comb = np.concatenate((X1, X2), 1) | |
| # rule_df_final = {} | |
| # rule_df_final['rad1'] = rule_df1 | |
| # rule_df_final['rad2'] = rule_df2 | |
| return rule_comb, rule_df1, rule_df2 | |
| def get_ddG0(rxn_dict, pH, I, novel_mets): | |
| ccache = CompoundCacher() | |
| # ddG0 = get_transform_ddG0(rxn_dict, ccache, pH, I, T) | |
| T = 298.15 | |
| ddG0_forward = 0 | |
| for compound_id, coeff in rxn_dict.items(): | |
| if novel_mets != None and compound_id in novel_mets: | |
| comp = novel_mets[compound_id] | |
| else: | |
| comp = ccache.get_compound(compound_id) | |
| ddG0_forward += coeff * comp.transform_pH7(pH, I, T) | |
| return ddG0_forward | |
| def get_dG0(rxn_dict, rid, pH, I, loaded_model, molsig_r1, molsig_r2, novel_decomposed_r1, novel_decomposed_r2, novel_mets): | |
| # rule_df = get_rxn_rule(rid) | |
| rule_comb, rule_df1, rule_df2 = get_rule( | |
| rxn_dict, molsig_r1, molsig_r2, novel_decomposed_r1, novel_decomposed_r2) | |
| X = rule_comb | |
| # X = X.reshape(1,-1) | |
| # pdb.set_trace() | |
| # print(np.shape(X1)) | |
| # print(np.shape(X2)) | |
| # print(np.shape(X)) | |
| ymean, ystd = loaded_model.predict(X, return_std=True) | |
| # print(ymean) | |
| # print(ystd) | |
| result = {} | |
| # result['dG0'] = ymean[0] + get_ddG0(rxn_dict, pH, I) | |
| # result['standard deviation'] = ystd[0] | |
| # result_df = pd.DataFrame([result]) | |
| # result_df.style.hide_index() | |
| # return result_df | |
| return ymean[0] + get_ddG0(rxn_dict, pH, I, novel_mets), ystd[0], rule_df1, rule_df2 | |
| # return ymean[0],ystd[0] | |
| def parse_novel_molecule(add_info): | |
| result = {} | |
| for cid, InChI in add_info.items(): | |
| c = Compound.from_inchi('Test', cid, InChI) | |
| result[cid] = c | |
| return result | |
| def parse_novel_smiles(result): | |
| novel_smiles = {} | |
| for cid, c in result.items(): | |
| smiles = c.smiles_pH7 | |
| novel_smiles[cid] = smiles | |
| return novel_smiles | |
| def main(): | |
| # def img_to_bytes(img_path): | |
| # img_bytes = Path(img_path).read_bytes() | |
| # encoded = base64.b64encode(img_bytes).decode() | |
| # return encoded | |
| # # st.title('dGPredictor') | |
| # header_html = "<img src='../figures/header.png'>" | |
| # st.markdown( | |
| # header_html, unsafe_allow_html=True, | |
| # ) | |
| db_smiles = load_smiles() | |
| molsig_r1 = load_molsig_rad1() | |
| molsig_r2 = load_molsig_rad2() | |
| loaded_model = load_model() | |
| ccache = load_compound_cache() | |
| st.image('./figures/header.png', use_column_width=True) | |
| st.subheader('Reaction (please use KEGG IDs)') | |
| # rxn_str = st.text_input('Reaction using KEGG ids:', value='C16688 + C00001 <=> C00095 + C00092') | |
| rxn_str = st.text_input( | |
| '', value='C01745 + C00004 <=> N00001 + C00003 + C00001') | |
| # rxn_str = st.text_input('', value='C16688 + C00001 <=> C00095 + C00092') | |
| # url = 'https://www.genome.jp/dbget-bin/www_bget?rn:R00801' | |
| # if st.button('KEGG format example'): | |
| # webbrowser.open_new_tab(url) | |
| if st.checkbox('Reaction has metabolites not in KEGG'): | |
| # st.subheader('test') | |
| add_info = st.text_area('Additional information (id: InChI):', | |
| '{"N00001":"InChI=1S/C14H12O/c15-14-8-4-7-13(11-14)10-9-12-5-2-1-3-6-12/h1-11,15H/b10-9+"}') | |
| else: | |
| add_info = '{"None":"None"}' | |
| # session_state = SessionState.get(name="", button_sent=False) | |
| # button_search = st.button("Search") | |
| # if button_search: | |
| # session_state.button_search = True | |
| pH = st.slider('pH', min_value=0.0, max_value=14.0, value=7.0, step=0.1) | |
| I = st.slider('Ionic strength [M]', min_value=0.0, | |
| max_value=0.5, value=0.1, step=0.01) | |
| if st.button("Search"): | |
| # if session_state.button_search: | |
| st.subheader('Reaction Equation') | |
| st.write(rxn_str) | |
| with st.spinner('Searching...'): | |
| try: | |
| novel_mets = parse_novel_molecule(json.loads(add_info)) | |
| novel_smiles = parse_novel_smiles(novel_mets) | |
| novel_decomposed_r1 = decompse_novel_mets_rad1(novel_smiles) | |
| novel_decomposed_r2 = decompse_novel_mets_rad2(novel_smiles) | |
| except Exception as e: | |
| novel_mets = None | |
| novel_smiles = None | |
| novel_decomposed_r1 = None | |
| novel_decomposed_r2 = None | |
| # novel_smiles = json.loads(add_info) | |
| print(novel_smiles) | |
| rxn_dict = parse_formula(rxn_str) | |
| st.image(draw_rxn_figure(rxn_dict, db_smiles, | |
| novel_smiles), use_column_width=True) | |
| # st.text('Group changes:') | |
| # st.write(parse_rule('R03921')) | |
| # st.write(get_rxn_rule('R03921')) | |
| # session_state.calculate = st.button('Start Calculate!') | |
| # if session_state.calculate: | |
| # if st.button('Start Calculate!'): | |
| # st.text('Result:') | |
| st.subheader('Thermodynamics') | |
| with st.spinner('Calculating...'): | |
| mu, std, rule_df1, rule_df2 = get_dG0( | |
| rxn_dict, 'R00801', pH, I, loaded_model, molsig_r1, molsig_r2, novel_decomposed_r1, novel_decomposed_r2, novel_mets) | |
| st.write(r"$\Delta_r G'^{o} = %.2f \pm %.2f \ kJ/mol$" % (mu, std)) | |
| st.text('Group changes:') | |
| st.write(rule_df1[(rule_df1.T != 0).any()]) | |
| st.write(rule_df2[(rule_df2.T != 0).any()]) | |
| if __name__ == '__main__': | |
| main() | |