Spaces:

Proddis
/

PBC_Complication_model

Sleeping

App Files Files Community

michalisG commited on Mar 27

Commit

acea606

•

1 Parent(s): 74f37f1

Add application file-1

Browse files

Files changed (2) hide show

app.py +65 -0
config.json +35 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import numpy as np
+import shap
+import streamlit as st
+import pandas as pd
+import joblib
+import matplotlib.pyplot as plt
+#
+from utils.data_processor import DataProcessor
+from utils.model_predictor import ModelPredictor
+from utils.user_input_features_collector import UserInputDataCollector
+model = joblib.load('resources/model.joblib')
+categorical_names = joblib.load('resources/categorical_names.pkl')
+target_labels = joblib.load('resources/target_labels.pkl')
+selected_features = []
+shap_explainer = shap.TreeExplainer(model.named_steps['RandomForestClassifier'])
+data_processor = DataProcessor(model, categorical_names, selected_features)
+predictor = ModelPredictor(model)
+st.write("### Enter Patient Information for Diagnosis Prediction")
+data = UserInputDataCollector.user_input_features()
+user_input = pd.DataFrame(data, index=[0])
+st.write("#### Patient Data")
+st.write(user_input)
+# In your Streamlit app, where you handle the "Predict" button:
+if st.button("Predict"):
+    prediction, probabilities = predictor.predict(user_input)
+    col1, col2 = st.columns(2)
+    labels_map = {0: "Transplant/Death", 1: "Survive"}
+    label = labels_map.get(int(np.argmax(probabilities)))
+    # with col1:
+    #     st.subheader("Prediction")
+    #     st.write(label)
+    #
+    # with col2:
+    st.subheader("Prediction Probabilities")
+    # Create a DataFrame for the probabilities to display them in a more readable format
+    proba_df = pd.DataFrame(probabilities, columns=labels_map.values())
+    st.dataframe(proba_df)  # Using st.dataframe to make it more interact
+    i = 0
+    preprocessed_input = data_processor.shap_and_eli5_custom_format(user_input)
+    shap_values = shap_explainer.shap_values(preprocessed_input)
+    # np.argmax(probabilities)
+    shap_explanation = shap.Explanation(values=shap_values[np.argmax(probabilities)][0, :],
+                                        base_values=shap_explainer.expected_value[np.argmax(probabilities)],
+                                        data=user_input.iloc[0, :],
+                                        feature_names=user_input.columns.tolist())
+    # Generate the SHAP waterfall plot
+    shap.plots.waterfall(shap_explanation, max_display=len(user_input.columns.tolist()), show=False)
+    # After generating the SHAP plot, grab the current figure
+    fig = plt.gcf()
+    fig.set_size_inches(10, 7, forward=True)
+    # Optionally, adjust the plot title or other properties here
+    fig.suptitle(f'Prediction: {label}', fontsize=20, y=1.05)
+    # Display the figure in Streamlit, passing it explicitly to ensure thread safety
+    st.pyplot(fig)
+    # Reset the default plot size if necessary
+    plt.rcParams['figure.figsize'] = plt.rcParamsDefault['figure.figsize']

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "model_type": "RandomForestClassifier",
+    "expected_features": [
+        "age",
+        "sex",
+        "serum_bilirubin",
+        "serum_cholesterol",
+        "albumin",
+        "alkaline_phosphatase",
+        "SGOT",
+        "platelets",
+        "prothrombin_time"
+    ],
+    "categorical_features": [
+        "drug",
+        "sex",
+        "presence_of_ascites",
+        "presence_of_hepatomegaly",
+        "presence_of_spiders",
+        "presence_of_edema"
+    ],
+    "model_parameters": {
+        "criterion": "entropy",
+        "max_features": 0.1,
+        "min_samples_split": 8,
+        "min_samples_leaf": 6,
+        "bootstrap": true
+    },
+    "version": "1.0",
+    "preprocessing": {
+        "numerical": "median imputation and scaling",
+        "categorical": "one-hot encoding",
+        "ordinal": "label encoding"
+    }
+}