legend1234 commited on
Commit
5bd9791
β€’
1 Parent(s): ed190ed

Reformat the layour

Browse files
app.py CHANGED
@@ -5,12 +5,17 @@ from io import StringIO
5
  import joblib
6
  import numpy as np
7
  import pandas as pd
 
8
  # page set up
9
  import streamlit as st
10
  from b3clf.descriptor_padel import compute_descriptors
11
  from b3clf.geometry_opt import geometry_optimize
12
- from b3clf.utils import (get_descriptors, predict_permeability,
13
- scale_descriptors, select_descriptors)
 
 
 
 
14
  from streamlit_ketcher import st_ketcher
15
 
16
  st.set_page_config(
@@ -54,10 +59,24 @@ scaler = joblib.load("pre_trained/b3clf_scaler.joblib")
54
 
55
  keep_features = "no"
56
  keep_sdf = "no"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  def generate_predictions(
60
- uploaded_file: st.file_uploader,
61
  sep: str = "\s+|\t+",
62
  clf: str = "xgb",
63
  sampling: str = "classic_ADASYN",
@@ -68,14 +87,14 @@ def generate_predictions(
68
  """
69
  # mol_tag = os.path.splitext(uploaded_file.name)[0]
70
  # uploaded_file = uploaded_file.read().decode("utf-8")
71
- mol_tag = os.path.basename(uploaded_file).split(".")[0]
72
  internal_sdf = f"{mol_tag}_optimized_3d.sdf"
73
 
74
  # Geometry optimization
75
  # Input:
76
  # * Either an SDF file with molecular geometries or a text file with SMILES strings
77
 
78
- geometry_optimize(input_fname=uploaded_file, output_sdf=internal_sdf, sep=sep)
79
 
80
  df_features = compute_descriptors(
81
  sdf_file=internal_sdf,
@@ -132,12 +151,55 @@ info_column, upload_column = st.columns(2)
132
 
133
  with upload_column:
134
  st.subheader("Molecule Input")
135
- file = st.file_uploader(
136
- label="Upload a CSV, SDF or TXT file",
137
- type=["csv", "sdf", "txt"],
138
- help="Input molecule file and only text files are supported.",
139
- # accept_multiple_files=False,
140
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  # st.write("The content of the file will be displayed below once uploaded.")
142
  # if file:
143
  # if "csv" in file.name or "txt" in file.name:
@@ -156,12 +218,13 @@ with info_column:
156
  feature_column, prediction_column = st.columns(2)
157
  with feature_column:
158
  st.subheader("Features")
 
159
  placeholder_features = st.empty()
160
  # placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
161
  # columns=["ID", "nAcid", "ALogP", "Alogp2",
162
  # "AMR", "naAromAtom", "nH", "nN"])
163
  # st.dataframe(placeholder_features)
164
- placeholder_features.text("molecular features")
165
 
166
  with prediction_column:
167
  st.subheader("Predictions")
@@ -177,7 +240,14 @@ if file:
177
  # Save the uploaded file to the temporary file path
178
  with open(temp_file_path, "wb") as temp_file:
179
  temp_file.write(file.read())
180
- X_features, results = generate_predictions(temp_file_path)
 
 
 
 
 
 
 
181
 
182
  # feture table
183
  with feature_column:
 
5
  import joblib
6
  import numpy as np
7
  import pandas as pd
8
+
9
  # page set up
10
  import streamlit as st
11
  from b3clf.descriptor_padel import compute_descriptors
12
  from b3clf.geometry_opt import geometry_optimize
13
+ from b3clf.utils import (
14
+ get_descriptors,
15
+ predict_permeability,
16
+ scale_descriptors,
17
+ select_descriptors,
18
+ )
19
  from streamlit_ketcher import st_ketcher
20
 
21
  st.set_page_config(
 
59
 
60
  keep_features = "no"
61
  keep_sdf = "no"
62
+ classifiers_dict = {
63
+ "decision trees": "dtree",
64
+ "kNN": "knn",
65
+ "logsistical regression": "logreg",
66
+ "XGBoost": "xgb",
67
+ }
68
+ resample_methods_dict = {
69
+ "random undersampling": "classic_RandUndersampling",
70
+ "SMOTE": "classic_SMOTE",
71
+ "Borderline SMOTE": "borderline_SMOTE",
72
+ "k-means SMOTE": "kmeans_SMOTE",
73
+ "ADASYN": "classic_ADASYN",
74
+ "no resampling": "common",
75
+ }
76
 
77
 
78
  def generate_predictions(
79
+ input_fname: str,
80
  sep: str = "\s+|\t+",
81
  clf: str = "xgb",
82
  sampling: str = "classic_ADASYN",
 
87
  """
88
  # mol_tag = os.path.splitext(uploaded_file.name)[0]
89
  # uploaded_file = uploaded_file.read().decode("utf-8")
90
+ mol_tag = os.path.basename(input_fname).split(".")[0]
91
  internal_sdf = f"{mol_tag}_optimized_3d.sdf"
92
 
93
  # Geometry optimization
94
  # Input:
95
  # * Either an SDF file with molecular geometries or a text file with SMILES strings
96
 
97
+ geometry_optimize(input_fname=input_fname, output_sdf=internal_sdf, sep=sep)
98
 
99
  df_features = compute_descriptors(
100
  sdf_file=internal_sdf,
 
151
 
152
  with upload_column:
153
  st.subheader("Molecule Input")
154
+ with st.container():
155
+ # uneven columns
156
+ # st.columns((2, 1, 1, 1))
157
+ # two subcolumns for sample input files
158
+ sample_sdf_column, classifier_col = st.columns(2)
159
+ with sample_sdf_column:
160
+ # download sample sdf
161
+ with open("sample_input.sdf", "r") as file_sdf:
162
+ btn = st.download_button(
163
+ label="Download SDF sample file",
164
+ data=file_sdf,
165
+ file_name="sample_input.sdf",
166
+ )
167
+ with classifier_col:
168
+ classifier = st.selectbox(
169
+ label="Classification algorithm:",
170
+ options=("XGBoost", "kNN", "decision trees", "logsistical regression"),
171
+ )
172
+
173
+ sample_smiles_column, resampler_col = st.columns(2)
174
+ with sample_smiles_column:
175
+ # download sample smiles
176
+ with open("sample_input_smiles.csv", "r") as file_smi:
177
+ btn = st.download_button(
178
+ label="Download SMILES sample file",
179
+ data=file_smi,
180
+ file_name="sample_input_smiles.csv",
181
+ )
182
+ with resampler_col:
183
+ resampler = st.selectbox(
184
+ label="Resampling method:",
185
+ options=(
186
+ "ADASYN",
187
+ "random undersampling",
188
+ "Borderline SMOTE",
189
+ "k-means SMOTE",
190
+ "SMOTE",
191
+ "no resampling",
192
+ ),
193
+ )
194
+
195
+ # horizontal line
196
+ st.divider()
197
+ file = st.file_uploader(
198
+ label="Upload a CSV, SDF or TXT file",
199
+ type=["csv", "sdf", "txt"],
200
+ help="Input molecule file and only text files are supported.",
201
+ # accept_multiple_files=False,
202
+ )
203
  # st.write("The content of the file will be displayed below once uploaded.")
204
  # if file:
205
  # if "csv" in file.name or "txt" in file.name:
 
218
  feature_column, prediction_column = st.columns(2)
219
  with feature_column:
220
  st.subheader("Features")
221
+
222
  placeholder_features = st.empty()
223
  # placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
224
  # columns=["ID", "nAcid", "ALogP", "Alogp2",
225
  # "AMR", "naAromAtom", "nH", "nN"])
226
  # st.dataframe(placeholder_features)
227
+ # placeholder_features.text("molecular features")
228
 
229
  with prediction_column:
230
  st.subheader("Predictions")
 
240
  # Save the uploaded file to the temporary file path
241
  with open(temp_file_path, "wb") as temp_file:
242
  temp_file.write(file.read())
243
+ # X_features, results = generate_predictions(temp_file_path)
244
+ X_features, results = generate_predictions(
245
+ input_fname=temp_file_path,
246
+ sep="\s+|\t+",
247
+ clf=classifiers_dict[classifier],
248
+ sampling=resample_methods_dict[resampler],
249
+ time_per_mol=120,
250
+ )
251
 
252
  # feture table
253
  with feature_column:
test_input_sdf.sdf β†’ sample_input.sdf RENAMED
File without changes
test_SMILES.csv β†’ sample_input_smiles.csv RENAMED
File without changes