import itertools as it
import os
import tempfile
from io import StringIO
import joblib
import numpy as np
import pandas as pd
import pkg_resources
# page set up
import streamlit as st
from b3clf.descriptor_padel import compute_descriptors
from b3clf.geometry_opt import geometry_optimize
from b3clf.utils import get_descriptors, scale_descriptors, select_descriptors
# from PIL import Image
from streamlit_extras.let_it_rain import rain
from streamlit_ketcher import st_ketcher
from utils import generate_predictions, load_all_models
st.cache_data.clear()
st.set_page_config(
page_title="BBB Permeability Prediction with Imbalanced Learning",
# page_icon="🧊",
layout="wide",
# initial_sidebar_state="expanded",
# menu_items={
# "Get Help": "https://www.extremelycoolapp.com/help",
# "Report a bug": "https://www.extremelycoolapp.com/bug",
# "About": "# This is a header. This is an *extremely* cool app!"
# }
)
keep_features = "no"
keep_sdf = "no"
classifiers_dict = {
"decision tree": "dtree",
"kNN": "knn",
"logistic regression": "logreg",
"XGBoost": "xgb",
}
resample_methods_dict = {
"random undersampling": "classic_RandUndersampling",
"SMOTE": "classic_SMOTE",
"Borderline SMOTE": "borderline_SMOTE",
"k-means SMOTE": "kmeans_SMOTE",
"ADASYN": "classic_ADASYN",
"no resampling": "common",
}
pandas_display_options = {
"line_limit": 50,
}
mol_features = None
info_df = None
results = None
temp_file_path = None
all_models = load_all_models()
# Create the Streamlit app
st.title(":blue[BBB Permeability Prediction with Imbalanced Learning]")
info_column, upload_column = st.columns(2)
# inatialize the molecule features and info dataframe session state
if "mol_features" not in st.session_state:
st.session_state.mol_features = None
if "info_df" not in st.session_state:
st.session_state.info_df = None
# download sample files
with info_column:
st.subheader("About `B3clf`")
# fmt: off
st.markdown(
"""
`B3clf` is a Python package for predicting the blood-brain barrier (BBB) permeability of small molecules using imbalanced learning. It supports decision tree, XGBoost, kNN, logistical regression and 5 resampling strategies (SMOTE, Borderline SMOTE, k-means SMOTE and ADASYN). The workflow of `B3clf` is summarized as below. The Source code and more details are available at https://github.com/theochem/B3clf. This project is supported by Digital Research Alliance of Canada (originally known as Compute Canada) and NSERC. This project is maintained by QC-Dev comminity. For further information and inquiries please contact us at qcdevs@gmail.com."""
)
st.text(" \n")
# text_body = """
# `B3clf` is a Python package for predicting the blood-brain barrier (BBB) permeability of small molecules using imbalanced learning. It supports decision tree, XGBoost, kNN, logistical regression and 5 resampling strategies (SMOTE, Borderline SMOTE, k-means SMOTE and ADASYN). The workflow of `B3clf` is summarized as below. The Source code and more details are available at https://github.com/theochem/B3clf.
# """
# st.markdown(f"
",
unsafe_allow_html=True,
)
submit_job_button = st.button(
label="Submit Job", type="secondary", key="job_button"
)
# submit_job_col.markdown("
",
# unsafe_allow_html=True)
# submit_job_button = submit_job_col.button(
# label="Submit job", key="submit_job_button", type="secondary"
# )
# submit_job_col.markdown("
", unsafe_allow_html=True)
# st.write("The content of the file will be displayed below once uploaded.")
# if file:
# if "csv" in file.name or "txt" in file.name:
# st.write(file.read().decode("utf-8"))
# st.write(file)
feature_column, prediction_column = st.columns(2)
with feature_column:
st.subheader("Molecular Features")
placeholder_features = st.empty()
# placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
# columns=["ID", "nAcid", "ALogP", "Alogp2",
# "AMR", "naAromAtom", "nH", "nN"])
# st.dataframe(placeholder_features)
# placeholder_features.text("molecular features")
with prediction_column:
st.subheader("Predictions")
# placeholder_predictions = st.empty()
# placeholder_predictions.text("prediction")
st.write(
f"the state of uploaded file changed before checking: {st.session_state.uploaded_file_changed}"
)
# Generate predictions when the user uploads a file
# if submit_job_button:
if submit_job_button:
if uploaded_file:
# st.write(f"the uploaded file: {uploaded_file}")
# when new file is uploaded is different from thprevious one
if st.session_state.uploaded_file != uploaded_file:
st.session_state.uploaded_file_changed = True
else:
st.session_state.uploaded_file_changed = False
st.session_state.uploaded_file = uploaded_file
# when new file is uploaded
# update_uploader_session_info()
st.write(
f"the state of uploaded file changed after checking: {st.session_state.uploaded_file_changed}"
)
if st.session_state.uploaded_file_changed:
temp_dir = tempfile.mkdtemp()
# Create a temporary file path for the uploaded file
temp_file_path = os.path.join(temp_dir, uploaded_file.name)
# Save the uploaded file to the temporary file path
with open(temp_file_path, "wb") as temp_file:
temp_file.write(uploaded_file.read())
mol_features, info_df, results = generate_predictions(
input_fname=temp_file_path,
sep="\s+|\t+",
clf=classifiers_dict[classifier],
_models_dict=all_models,
sampling=resample_methods_dict[resampler],
time_per_mol=120,
mol_features=None,
info_df=None,
)
st.session_state.mol_features = mol_features
st.session_state.info_df = info_df
else:
mol_features, info_df, results = generate_predictions(
input_fname=None,
sep="\s+|\t+",
clf=classifiers_dict[classifier],
_models_dict=all_models,
sampling=resample_methods_dict[resampler],
time_per_mol=120,
mol_features=st.session_state.mol_features,
info_df=st.session_state.info_df,
)
# feture table
with feature_column:
if mol_features is not None:
selected_feature_rows = np.min(
[mol_features.shape[0], pandas_display_options["line_limit"]]
)
st.dataframe(mol_features.iloc[:selected_feature_rows, :], hide_index=False)
# placeholder_features.dataframe(mol_features, hide_index=False)
feature_file_name = uploaded_file.name.split(".")[0] + "_b3clf_features.csv"
features_csv = mol_features.to_csv(index=True)
st.download_button(
"Download features as CSV",
data=features_csv,
file_name=feature_file_name,
)
# prediction table
with prediction_column:
# st.subheader("Predictions")
if results is not None:
# Display the predictions in a table
selected_result_rows = np.min(
[results.shape[0], pandas_display_options["line_limit"]]
)
results_df_display = results.iloc[:selected_result_rows, :].style.format(
{"B3clf_predicted_probability": "{:.6f}".format}
)
st.dataframe(results_df_display, hide_index=True)
# Add a button to download the predictions as a CSV file
predictions_csv = results.to_csv(index=True)
results_file_name = (
uploaded_file.name.split(".")[0] + "_b3clf_predictions.csv"
)
st.download_button(
"Download predictions as CSV",
data=predictions_csv,
file_name=results_file_name,
)
# indicate the success of the job
# rain(
# emoji="🎈",
# font_size=54,
# falling_speed=5,
# animation_length=10,
# )
st.balloons()
# hide footer
# https://github.com/streamlit/streamlit/issues/892
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
# add google analytics
st.markdown(
"""
""",
unsafe_allow_html=True,
)