Alishba Imran commited on
Commit
28dae5c
1 Parent(s): 247d0f2

First model version

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ final_models/rf_final_model.txt filter=lfs diff=lfs merge=lfs -text
36
+ final_models/tf_chp_initial/ckpt-94/ckpt-197.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
3
+
4
+ import warnings
5
+ warnings.filterwarnings("ignore")
6
+
7
+
8
+
9
+ from PIL import Image
10
+ import base64
11
+ import pandas as pd
12
+ import streamlit as st
13
+ import pickle
14
+ from rdkit import Chem
15
+ from rdkit.Chem import AllChem
16
+ from sklearn.ensemble import RandomForestRegressor
17
+
18
+ #For KERAS
19
+ import random
20
+ import numpy as np
21
+ from keras.wrappers.scikit_learn import KerasRegressor
22
+ from sklearn.metrics import mean_squared_error
23
+ import time
24
+
25
+ import numpy
26
+ from sklearn.model_selection import GridSearchCV
27
+
28
+ import tensorflow
29
+ from tensorflow.keras.models import Sequential
30
+ from tensorflow.keras.layers import Dense
31
+ from tensorflow.keras.layers import Dropout
32
+
33
+ #from keras.layers import Dense
34
+ #from keras.layers import Dropout
35
+ # Function to create model, required for KerasClassifier
36
+
37
+
38
+ def create_model(optimizer='RMSprop', learn_rate=0.1, momentum=0.4, activation='sigmoid', dropout_rate=0.0):
39
+
40
+ keras_model = Sequential()
41
+ keras_model.add(Dense(128, input_dim=train_encoded.shape[1], activation=activation))
42
+ keras_model.add(Dropout(dropout_rate))
43
+ keras_model.add(Dense(32, activation=activation))
44
+ keras_model.add(Dropout(dropout_rate))
45
+ keras_model.add(Dense(8,activation=activation))
46
+ keras_model.add(Dropout(dropout_rate))
47
+ keras_model.add(Dense(1,activation='linear'))
48
+ keras_model.summary()
49
+ # Compile model
50
+ keras_model.compile(loss='mean_squared_error', optimizer=optimizer)
51
+
52
+ return keras_model
53
+
54
+
55
+ ######################
56
+ # Custom function
57
+ ######################
58
+ ## Calculate molecular descriptors
59
+
60
+ def get_ecfc(smiles_list, radius=2, nBits=2048, useCounts=True):
61
+ """
62
+ Calculates the ECFP fingerprint for given SMILES list
63
+
64
+ :param smiles_list: List of SMILES
65
+ :type smiles_list: list
66
+ :param radius: The ECPF fingerprints radius.
67
+ :type radius: int
68
+ :param nBits: The number of bits of the fingerprint vector.
69
+ :type nBits: int
70
+ :param useCounts: Use count vector or bit vector.
71
+ :type useCounts: bool
72
+ :returns: The calculated ECPF fingerprints for the given SMILES
73
+ :rtype: Dataframe
74
+ """
75
+
76
+ ecfp_fingerprints=[]
77
+ erroneous_smiles=[]
78
+ for smiles in smiles_list:
79
+ mol=Chem.MolFromSmiles(smiles)
80
+ if mol is None:
81
+ ecfp_fingerprints.append([None]*nBits)
82
+ erroneous_smiles.append(smiles)
83
+ else:
84
+ mol=Chem.AddHs(mol)
85
+ if useCounts:
86
+ ecfp_fingerprints.append(list(AllChem.GetHashedMorganFingerprint(mol, radius, nBits)))
87
+ else:
88
+ ecfp_fingerprints.append(list(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits).ToBitString()))
89
+
90
+ # Create dataframe of fingerprints
91
+ df_ecfp_fingerprints = pd.DataFrame(data = ecfp_fingerprints, index = smiles_list)
92
+ # Remove erroneous data
93
+ if len(erroneous_smiles)>0:
94
+ print("The following erroneous SMILES have been found in the data:\n{}.\nThe erroneous SMILES will be removed from the data.".format('\n'.join(map(str, erroneous_smiles))))
95
+ df_ecfp_fingerprints = df_ecfp_fingerprints.dropna(how='any')
96
+
97
+ return df_ecfp_fingerprints
98
+
99
+
100
+
101
+
102
+ ## generate dataset it is diffrent from origin one
103
+ import deepchem as dc
104
+ from deepchem.models import GraphConvModel
105
+
106
+ def generate(SMILES, verbose=False):
107
+
108
+ featurizer = dc.feat.ConvMolFeaturizer()
109
+ gcn = featurizer.featurize(SMILES)
110
+ properties = [random.randint(-1,1)/100 for i in range(0,len(SMILES))]
111
+ dataset = dc.data.NumpyDataset(X=gcn, y=np.array(properties))
112
+
113
+ return dataset
114
+
115
+
116
+ ######################
117
+ # Page Title
118
+ ######################
119
+
120
+
121
+
122
+ st.write("""# Accelerated reaction energy prediction for redox batteries 🧪 """)
123
+ st.write('By: [Alishba Imran](https://www.linkedin.com/in/alishba-imran-/)')
124
+
125
+
126
+
127
+
128
+ #%%
129
+ # About PART
130
+
131
+ about_part = st.expander("Learn More About Project", expanded=False)
132
+ with about_part:
133
+ st.write('''
134
+ #### About
135
+ Redox flow batteries (RFB) are widely being explored as a class of electrochemical energy storage devices for large-scale energy storage applications. Redox flow batteries convert electrical energy to chemical energy via electrochemical reactions (through reversible oxidation and reduction) of compounds.
136
+
137
+ To develop next-gen redox flow batteries with high cycle life and energy density, we need to speed up the discovery of electroactive materials with desired properties. This process can currently be very slow and expensive given how large and diverse the chemical space of the candidate compounds is.
138
+
139
+ Using an attention-based graph convolutional neural network technique, I've developed a model that can take in reactants as SMILEs and predict the reaction energy in the redox reaction.
140
+
141
+ A lot of this work was inspired and built on top of the paper [here](https://chemrxiv.org/engage/chemrxiv/article-details/60c7575f469df44a40f45465). Feel free to give it a try and reach out for any feedback. Email: alishbai734@gmail.com.
142
+
143
+
144
+ ''')
145
+
146
+
147
+
148
+
149
+ st.write('**Insert your SMILES**')
150
+
151
+ st.write('Type any SMILES used as a reactant in the redox reaction. This model will output the reaction energy.')
152
+
153
+ ## Read SMILES input
154
+ SMILES_input = "Oc1cccc(c12)c(O)c(nn2)O\nc1cccc(c12)cc(nn2)O\nOc1c(O)ccc(c12)cc(nn2)O"
155
+
156
+ SMILES = st.text_area('press ctrl+enter to run model!', SMILES_input, height=20)
157
+ SMILES = SMILES.split('\n')
158
+ SMILES = list(filter(None, SMILES))
159
+
160
+
161
+
162
+ # st.header('Input SMILES')
163
+ # SMILES[1:] # Skips the dummy first item
164
+
165
+ # Use only top 1000
166
+ if len(SMILES)>1000:
167
+ SMILES=SMILES[0:1000]
168
+
169
+ ## Calculate molecular descriptors
170
+ ecfc_encoder = get_ecfc(SMILES)
171
+
172
+ #Import pretrained models
173
+
174
+ #---------------------------------------------------------------------------------
175
+ ### generate dataset from SMILES and function generate
176
+ generated_dataset = generate(SMILES)
177
+
178
+ ### transformer for gcn
179
+ filename = 'final_models/transformers.pkl'
180
+ infile = open(filename,'rb')
181
+ transformers = pickle.load(infile)
182
+ infile.close()
183
+
184
+
185
+ ## model for gcn
186
+ model_dir = 'final_models/tf_chp_initial'
187
+ gcne_model = dc.models.GraphConvModel(n_tasks=1, batch_size=100, mode='regression', dropout=0.25,model_dir= model_dir,random_seed=0)
188
+ gcne_model.restore('final_models/tf_chp_initial/ckpt-94/ckpt-197')
189
+ #print(gcne_model)
190
+
191
+
192
+ ## predict energy from gcn model
193
+ pred_gcne = gcne_model.predict(generated_dataset, transformers)
194
+
195
+
196
+ #---------------------------------------------------------------------------------
197
+ ##keras model load
198
+ from keras.models import model_from_json
199
+
200
+ keras_final_model = model_from_json(open('./final_models/keras_final_model_architecture.json').read())
201
+ keras_final_model.load_weights('./final_models/keras_final_model_weights.h5')
202
+
203
+ #keras_final_model = pickle.load(open(r'./final_models/keras_final_model.txt', "rb"))
204
+ rf_final_model = pickle.load(open(r'./final_models/rf_final_model.txt', "rb"))
205
+ #xgbm_final_model = pickle.load(open(r'.\final_models\xgbm_final_model.txt', "rb"))
206
+
207
+
208
+
209
+ #predict test data (Keras,RF, GCN)
210
+ pred_keras = keras_final_model.predict(ecfc_encoder)
211
+ pred_rf = rf_final_model.predict(ecfc_encoder)
212
+
213
+ ##reshape (n,) ----> (n,1)
214
+
215
+ pred_rf_r = pred_rf.reshape((len(pred_rf),1))
216
+ #pred_xgb = xgbm_final_model.predict(ecfc_encoder)
217
+
218
+
219
+ #calculate consensus
220
+ pred_consensus = (pred_keras + pred_gcne + pred_rf)/3
221
+ # predefined_models.get_errors(test_logS_list,pred_enseble)
222
+
223
+ #%% Weighted
224
+
225
+ #------------------------------------------------------------------------------------------------------------------
226
+
227
+
228
+ #------------------------------------------------------------------------------------------------------------------
229
+
230
+
231
+
232
+
233
+
234
+
235
+ from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
236
+
237
+ ## Test 1 Experiments
238
+
239
+ test1_mae = []
240
+
241
+ test1_mae.append(0.00705) # 0 - GCN
242
+ test1_mae.append(0.00416) # 1 - Keras
243
+ test1_mae.append(0.0035) # 3 - RF
244
+
245
+
246
+
247
+ ## Test 2 Experiments
248
+
249
+ test2_mae = []
250
+
251
+ test2_mae.append(0.00589) # 0 - GCN
252
+ test2_mae.append(0.00483) # 1 - Keras
253
+ test2_mae.append(0.00799) # 3 - RF
254
+
255
+
256
+
257
+ weighted_pred_0_1_3=( np.power(2/(test1_mae[0]+test2_mae[0]),3) * pred_gcne +
258
+ np.power(2/(test1_mae[1]+test2_mae[1]),3) * pred_keras +
259
+ np.power(2/(test1_mae[2]+test2_mae[2]),3) * pred_rf_r ) / (
260
+ np.power(2/(test1_mae[0]+test2_mae[0]),3) + np.power(2/(test1_mae[1]+test2_mae[1]),3) + np.power(2/(test1_mae[2]+test2_mae[2]),3))
261
+
262
+
263
+
264
+ #--------
265
+
266
+ #### ???? array shape not correct and no difference with pred_consensus
267
+
268
+ pred_weighted = (pred_gcne + pred_keras + pred_rf_r)/3
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+ #%%
277
+ # results=np.column_stack([pred_mlp,pred_xgb,pred_rf,pred_consensus])
278
+
279
+ df_results = pd.DataFrame(SMILES, columns=['SMILES Reactant'])
280
+ df_results["Predicted Reaction Energy"]= weighted_pred_0_1_3
281
+ #df_results["reaction_energy"]= pred_weighted
282
+ df_results=df_results.round(6)
283
+
284
+ # df_results.to_csv("results/predicted-"+test_data_name+".csv",index=False)
285
+
286
+
287
+ # Results DF
288
+
289
+ st.header('Prediction of Reaction Energy for RFB')
290
+ df_results # Skips the dummy first item
291
+
292
+
293
+
294
+
295
+
296
+
297
+ # download=st.button('Download Results File')
298
+ # if download:
299
+ csv = df_results.to_csv(index=False)
300
+ b64 = base64.b64encode(csv.encode()).decode() # some strings
301
+
302
+
303
+
environment_ex.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: redpred
2
+ channels:
3
+ - rdkit
4
+ - conda-forge
5
+ - defaults
6
+ dependencies:
7
+ - libgcc=5.2.0
8
+ - rdkit
9
+ - pip
10
+ - pip:
11
+ - streamlit==1.12.0
12
+ - scikit-learn
13
+ - mordred
14
+ - pandas==1.0.3
15
+ - numpy==1.16
16
+ - tensorflow==2.3.2
17
+ - keras==2.4.3
18
+ - deepchem==2.4.0
final_models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
final_models/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+
final_models/keras_final_model_architecture.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 2048], "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_input"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": [null, 2048], "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 32, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.4.0", "backend": "tensorflow"}
final_models/keras_final_model_weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98052060b363a5fe8dc8e9698651328d07676305836a497fdf8a37e45a52f16e
3
+ size 1083600
final_models/rf_final_model.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0909b7349b8d7de88b90a087724d5300f6c96bd11e24dcde2e813ccaa4659d65
3
+ size 75279214
final_models/tf_chp_initial/ckpt-94/checkpoint ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ model_checkpoint_path: "ckpt-197"
2
+ all_model_checkpoint_paths: "ckpt-197"
3
+ all_model_checkpoint_timestamps: 1625564744.3729
4
+ last_preserved_timestamp: 1624939492.862955
final_models/tf_chp_initial/ckpt-94/ckpt-197.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ec8ac25c78a9aae9e2f61edc697a599bd30f4a2e884680097abc227d3222f9
3
+ size 2434854
final_models/tf_chp_initial/ckpt-94/ckpt-197.index ADDED
Binary file (19.1 kB). View file
 
final_models/transformers.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d02fa1565aef26039a3fe8468caae723afddd2f1a5a890fd10a1eab8456e2e
3
+ size 426
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ deepchem
2
+ keras
3
+ numpy
4
+ pandas
5
+ Pillow
6
+ rdkit
7
+ scikit_learn
8
+ streamlit
9
+ tensorflow
transformers.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d02fa1565aef26039a3fe8468caae723afddd2f1a5a890fd10a1eab8456e2e
3
+ size 426