Spaces:
Sleeping
Sleeping
Commit
•
95b3113
1
Parent(s):
9c908f4
Refactor the initialization of molecular feature matrix
Browse files
app.py
CHANGED
@@ -5,18 +5,12 @@ from io import StringIO
|
|
5 |
import joblib
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
-
|
9 |
# page set up
|
10 |
import streamlit as st
|
11 |
from b3clf.descriptor_padel import compute_descriptors
|
12 |
from b3clf.geometry_opt import geometry_optimize
|
13 |
-
from b3clf.utils import (
|
14 |
-
|
15 |
-
predict_permeability,
|
16 |
-
scale_descriptors,
|
17 |
-
select_descriptors,
|
18 |
-
)
|
19 |
-
|
20 |
# from PIL import Image
|
21 |
from streamlit_extras.let_it_rain import rain
|
22 |
from streamlit_ketcher import st_ketcher
|
@@ -54,47 +48,56 @@ resample_methods_dict = {
|
|
54 |
pandas_display_options = {
|
55 |
"line_limit": 50,
|
56 |
}
|
|
|
|
|
57 |
|
58 |
|
59 |
# @st.cache_resource
|
60 |
def generate_predictions(
|
61 |
-
input_fname: str,
|
62 |
sep: str = "\s+|\t+",
|
63 |
clf: str = "xgb",
|
64 |
sampling: str = "classic_ADASYN",
|
65 |
time_per_mol: int = 120,
|
|
|
|
|
66 |
):
|
67 |
"""
|
68 |
Generate predictions for a given input file.
|
69 |
"""
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
|
91 |
-
|
92 |
|
93 |
-
|
94 |
-
|
95 |
|
96 |
-
|
97 |
-
|
|
|
98 |
|
99 |
# Get classifier
|
100 |
# clf = get_clf(clf_str=clf, sampling_str=sampling)
|
@@ -103,7 +106,7 @@ def generate_predictions(
|
|
103 |
result_df = predict_permeability(
|
104 |
clf_str=clf,
|
105 |
sampling_str=sampling,
|
106 |
-
|
107 |
info_df=info_df,
|
108 |
threshold="none",
|
109 |
)
|
@@ -120,9 +123,7 @@ def generate_predictions(
|
|
120 |
[col for col in result_df.columns.to_list() if col in display_cols]
|
121 |
]
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
return X_features, result_df
|
126 |
|
127 |
|
128 |
# Create the Streamlit app
|
@@ -207,7 +208,7 @@ with upload_column:
|
|
207 |
# upload file column
|
208 |
with upload_col:
|
209 |
file = st.file_uploader(
|
210 |
-
label="Upload a CSV, SDF or
|
211 |
type=["csv", "sdf", "txt", "smi"],
|
212 |
help="Input molecule file only supports *.csv, *.sdf, *.txt and *.smi.",
|
213 |
accept_multiple_files=False,
|
@@ -264,24 +265,26 @@ if submit_job_button:
|
|
264 |
# Save the uploaded file to the temporary file path
|
265 |
with open(temp_file_path, "wb") as temp_file:
|
266 |
temp_file.write(file.read())
|
267 |
-
#
|
268 |
-
|
269 |
input_fname=temp_file_path,
|
270 |
sep="\s+|\t+",
|
271 |
clf=classifiers_dict[classifier],
|
272 |
sampling=resample_methods_dict[resampler],
|
273 |
time_per_mol=120,
|
|
|
|
|
274 |
)
|
275 |
|
276 |
# feture table
|
277 |
with feature_column:
|
278 |
selected_feature_rows = np.min(
|
279 |
-
[
|
280 |
)
|
281 |
-
st.dataframe(
|
282 |
-
# placeholder_features.dataframe(
|
283 |
feature_file_name = file.name.split(".")[0] + "_b3clf_features.csv"
|
284 |
-
features_csv =
|
285 |
st.download_button(
|
286 |
"Download features as CSV",
|
287 |
data=features_csv,
|
|
|
5 |
import joblib
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
|
|
8 |
# page set up
|
9 |
import streamlit as st
|
10 |
from b3clf.descriptor_padel import compute_descriptors
|
11 |
from b3clf.geometry_opt import geometry_optimize
|
12 |
+
from b3clf.utils import (get_descriptors, predict_permeability,
|
13 |
+
scale_descriptors, select_descriptors)
|
|
|
|
|
|
|
|
|
|
|
14 |
# from PIL import Image
|
15 |
from streamlit_extras.let_it_rain import rain
|
16 |
from streamlit_ketcher import st_ketcher
|
|
|
48 |
pandas_display_options = {
|
49 |
"line_limit": 50,
|
50 |
}
|
51 |
+
mol_features = None
|
52 |
+
info_df = None
|
53 |
|
54 |
|
55 |
# @st.cache_resource
|
56 |
def generate_predictions(
|
57 |
+
input_fname: str = None,
|
58 |
sep: str = "\s+|\t+",
|
59 |
clf: str = "xgb",
|
60 |
sampling: str = "classic_ADASYN",
|
61 |
time_per_mol: int = 120,
|
62 |
+
mol_features: pd.DataFrame = None,
|
63 |
+
info_df: pd.DataFrame = None,
|
64 |
):
|
65 |
"""
|
66 |
Generate predictions for a given input file.
|
67 |
"""
|
68 |
+
if mol_features is None and info_df is None:
|
69 |
+
# mol_tag = os.path.splitext(uploaded_file.name)[0]
|
70 |
+
# uploaded_file = uploaded_file.read().decode("utf-8")
|
71 |
+
mol_tag = os.path.basename(input_fname).split(".")[0]
|
72 |
+
internal_sdf = f"{mol_tag}_optimized_3d.sdf"
|
73 |
+
|
74 |
+
# Geometry optimization
|
75 |
+
# Input:
|
76 |
+
# * Either an SDF file with molecular geometries or a text file with SMILES strings
|
77 |
+
|
78 |
+
geometry_optimize(input_fname=input_fname, output_sdf=internal_sdf, sep=sep)
|
79 |
+
|
80 |
+
df_features = compute_descriptors(
|
81 |
+
sdf_file=internal_sdf,
|
82 |
+
excel_out=None,
|
83 |
+
output_csv=None,
|
84 |
+
timeout=None,
|
85 |
+
time_per_molecule=time_per_mol,
|
86 |
+
)
|
87 |
+
# st.write(df_features)
|
88 |
+
|
89 |
+
# Get computed descriptors
|
90 |
+
mol_features, info_df = get_descriptors(df=df_features)
|
91 |
|
92 |
+
# Select descriptors
|
93 |
+
mol_features = select_descriptors(df=mol_features)
|
94 |
|
95 |
+
# Scale descriptors
|
96 |
+
mol_features.iloc[:, :] = scale_descriptors(df=mol_features)
|
97 |
|
98 |
+
# this is problematic for using the same file for calculation
|
99 |
+
if os.path.exists(internal_sdf) and keep_sdf == "no":
|
100 |
+
os.remove(internal_sdf)
|
101 |
|
102 |
# Get classifier
|
103 |
# clf = get_clf(clf_str=clf, sampling_str=sampling)
|
|
|
106 |
result_df = predict_permeability(
|
107 |
clf_str=clf,
|
108 |
sampling_str=sampling,
|
109 |
+
mol_features=mol_features,
|
110 |
info_df=info_df,
|
111 |
threshold="none",
|
112 |
)
|
|
|
123 |
[col for col in result_df.columns.to_list() if col in display_cols]
|
124 |
]
|
125 |
|
126 |
+
return mol_features, info_df, result_df
|
|
|
|
|
127 |
|
128 |
|
129 |
# Create the Streamlit app
|
|
|
208 |
# upload file column
|
209 |
with upload_col:
|
210 |
file = st.file_uploader(
|
211 |
+
label="Upload a CSV, SDF, TXT or SMI file",
|
212 |
type=["csv", "sdf", "txt", "smi"],
|
213 |
help="Input molecule file only supports *.csv, *.sdf, *.txt and *.smi.",
|
214 |
accept_multiple_files=False,
|
|
|
265 |
# Save the uploaded file to the temporary file path
|
266 |
with open(temp_file_path, "wb") as temp_file:
|
267 |
temp_file.write(file.read())
|
268 |
+
# mol_features, results = generate_predictions(temp_file_path)
|
269 |
+
mol_features, info_df, results = generate_predictions(
|
270 |
input_fname=temp_file_path,
|
271 |
sep="\s+|\t+",
|
272 |
clf=classifiers_dict[classifier],
|
273 |
sampling=resample_methods_dict[resampler],
|
274 |
time_per_mol=120,
|
275 |
+
mol_features=mol_features,
|
276 |
+
info_df=info_df,
|
277 |
)
|
278 |
|
279 |
# feture table
|
280 |
with feature_column:
|
281 |
selected_feature_rows = np.min(
|
282 |
+
[mol_features.shape[0], pandas_display_options["line_limit"]]
|
283 |
)
|
284 |
+
st.dataframe(mol_features.iloc[:selected_feature_rows, :], hide_index=False)
|
285 |
+
# placeholder_features.dataframe(mol_features, hide_index=False)
|
286 |
feature_file_name = file.name.split(".")[0] + "_b3clf_features.csv"
|
287 |
+
features_csv = mol_features.to_csv(index=True)
|
288 |
st.download_button(
|
289 |
"Download features as CSV",
|
290 |
data=features_csv,
|