import os import tempfile from io import StringIO import joblib import numpy as np import pandas as pd # page set up import streamlit as st from b3clf.descriptor_padel import compute_descriptors from b3clf.geometry_opt import geometry_optimize from b3clf.utils import ( get_descriptors, predict_permeability, scale_descriptors, select_descriptors, ) # from PIL import Image from streamlit_extras.let_it_rain import rain from streamlit_ketcher import st_ketcher st.set_page_config( page_title="BBB Permeability Prediction with Imbalanced Learning", # page_icon="🧊", layout="wide", # initial_sidebar_state="expanded", # menu_items={ # 'Get Help': '', # 'Report a bug': "", # 'About': "# This is a header. This is an *extremely* cool app!" # } ) keep_features = "no" keep_sdf = "no" classifiers_dict = { "decision tree": "dtree", "kNN": "knn", "logistic regression": "logreg", "XGBoost": "xgb", } resample_methods_dict = { "random undersampling": "classic_RandUndersampling", "SMOTE": "classic_SMOTE", "Borderline SMOTE": "borderline_SMOTE", "k-means SMOTE": "kmeans_SMOTE", "ADASYN": "classic_ADASYN", "no resampling": "common", } pandas_display_options = { "line_limit": 50, } # @st.cache_resource def generate_predictions( input_fname: str, sep: str = "\s+|\t+", clf: str = "xgb", sampling: str = "classic_ADASYN", time_per_mol: int = 120, ): """ Generate predictions for a given input file. """ # mol_tag = os.path.splitext([0] # uploaded_file ="utf-8") mol_tag = os.path.basename(input_fname).split(".")[0] internal_sdf = f"{mol_tag}_optimized_3d.sdf" # Geometry optimization # Input: # * Either an SDF file with molecular geometries or a text file with SMILES strings geometry_optimize(input_fname=input_fname, output_sdf=internal_sdf, sep=sep) df_features = compute_descriptors( sdf_file=internal_sdf, excel_out=None, output_csv=None, timeout=None, time_per_molecule=time_per_mol, ) # st.write(df_features) # Get computed descriptors X_features, info_df = get_descriptors(df=df_features) # Select descriptors X_features = select_descriptors(df=X_features) # Scale descriptors X_features = scale_descriptors(df=X_features) # Get classifier # clf = get_clf(clf_str=clf, sampling_str=sampling) # Get classifier result_df = predict_permeability( clf_str=clf, sampling_str=sampling, features_df=X_features, info_df=info_df, threshold="none", ) # Get classifier display_cols = [ "ID", "SMILES", "B3clf_predicted_probability", "B3clf_predicted_label", ] result_df = result_df[ [col for col in result_df.columns.to_list() if col in display_cols] ] os.remove(internal_sdf) return X_features, result_df # Create the Streamlit app st.title(":blue[BBB Permeability Prediction with Imbalanced Learning]") info_column, upload_column = st.columns(2) # download sample files with info_column: st.subheader("About `B3clf`") # fmt: off st.markdown( """ `B3clf` is a Python package for predicting the blood-brain barrier (BBB) permeability of small molecules using imbalanced learning. It supports decision tree, XGBoost, kNN, logistical regression and 5 resampling strategies (SMOTE, Borderline SMOTE, k-means SMOTE and ADASYN). The workflow of `B3clf` is summarized as below. The Source code and more details are available at This project is supported by Digital Research Alliance of Canada (originally known as Compute Canada) and NSERC. This project is maintained by QC-Dev comminity. For further information and inquiries please contact us at""" ) st.text(" \n") # text_body = ''' # `B3clf` is a Python package for predicting the blood-brain barrier (BBB) permeability of small molecules using imbalanced learning. It supports decision tree, XGBoost, kNN, logistical regression and 5 resampling strategies (SMOTE, Borderline SMOTE, k-means SMOTE and ADASYN). The workflow of `B3clf` is summarized as below. The Source code and more details are available at # ''' # st.markdown(f'


", unsafe_allow_html=True) # st.write("The content of the file will be displayed below once uploaded.") # if file: # if "csv" in or "txt" in # st.write("utf-8")) # st.write(file) feature_column, prediction_column = st.columns(2) with feature_column: st.subheader("Molecular Features") placeholder_features = st.empty() # placeholder_features = pd.DataFrame(index=[1, 2, 3, 4], # columns=["ID", "nAcid", "ALogP", "Alogp2", # "AMR", "naAromAtom", "nH", "nN"]) # st.dataframe(placeholder_features) # placeholder_features.text("molecular features") with prediction_column: st.subheader("Predictions") # placeholder_predictions = st.empty() # placeholder_predictions.text("prediction") # Generate predictions when the user uploads a file if submit_job_button: if file: temp_dir = tempfile.mkdtemp() # Create a temporary file path for the uploaded file temp_file_path = os.path.join(temp_dir, # Save the uploaded file to the temporary file path with open(temp_file_path, "wb") as temp_file: temp_file.write( # X_features, results = generate_predictions(temp_file_path) X_features, results = generate_predictions( input_fname=temp_file_path, sep="\s+|\t+", clf=classifiers_dict[classifier], sampling=resample_methods_dict[resampler], time_per_mol=120, ) # feture table with feature_column: selected_feature_rows = np.min( [X_features.shape[0], pandas_display_options["line_limit"]] ) st.dataframe(X_features.iloc[:selected_feature_rows, :], hide_index=False) # placeholder_features.dataframe(X_features, hide_index=False) feature_file_name =".")[0] + "_b3clf_features.csv" features_csv = X_features.to_csv(index=True) st.download_button( "Download features as CSV", data=features_csv, file_name=feature_file_name, ) # prediction table with prediction_column: # st.subheader("Predictions") if results is not None: # Display the predictions in a table selected_result_rows = np.min( [results.shape[0], pandas_display_options["line_limit"]] ) results_df_display = results.iloc[ :selected_result_rows, : ].style.format({"B3clf_predicted_probability": "{:.6f}".format}) st.dataframe(results_df_display, hide_index=True) # Add a button to download the predictions as a CSV file predictions_csv = results.to_csv(index=True) results_file_name =".")[0] + "_b3clf_predictions.csv" st.download_button( "Download predictions as CSV", data=predictions_csv, file_name=results_file_name, ) # indicate the success of the job # rain( # emoji="🎈", # font_size=54, # falling_speed=5, # animation_length=10, # ) st.balloons() # hide footer # hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) # add google analytics st.markdown( """ """, unsafe_allow_html=True, )