Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import joblib | |
import numpy as np | |
import shap | |
import matplotlib.pyplot as plt | |
# Path of the desired scaler to be tested, can be changed | |
PATH_SCALER = "standard_scaler_1.sav" | |
# Path of the desired model to be tested, can be changed | |
PATH_MODEL = "random_forest_1.sav" | |
# Select the type of interface, "Single" for manually inserting a record | |
# "Multiple" for automatically inserting multiple records from a pickle file | |
type_interface = "Single" | |
loaded_scaler = joblib.load(PATH_SCALER) | |
loaded_model = joblib.load(PATH_MODEL) | |
feature_names = loaded_model.feature_names | |
# Define what to do with data given from the user form | |
def classify_company(*feature_values): | |
# Create Pandas dataframe | |
x = pd.DataFrame([feature_values], columns=loaded_model.feature_names, dtype=float) | |
# Scale it accordingly with the used scaler during the training phase | |
scaled_x = loaded_scaler.transform(x) | |
# Obtain prediction from model | |
prediction = loaded_model.predict_proba(scaled_x)[0] | |
prediction_dict = {"Active": float(prediction[0]), "Bankruptcy": float(prediction[1])} | |
# Create SHAP explainer | |
explainer = shap.TreeExplainer(loaded_model) | |
shap_values = explainer.shap_values(scaled_x) | |
# Obtain SHAP values plot | |
shap.force_plot( | |
base_value=explainer.expected_value[0], | |
shap_values=shap_values[0], | |
features=x, | |
feature_names=loaded_model.feature_names, | |
out_names=["Active", "Bankruptcy"], | |
matplotlib=True, | |
figsize=(30,7), | |
show=False, | |
) | |
# Create figure | |
plt.tight_layout() | |
fig = plt.gcf() | |
plt.close() | |
# Return prediction values and shap plot | |
return prediction_dict, fig | |
# Define what to do with data given from the Pickle file | |
def classify_companies(file): | |
# Read file as Pickle | |
input_dataset = pd.read_pickle(file.name) | |
input_dataset = input_dataset[feature_names] | |
# Remove unused index | |
input_dataset.reset_index(drop=True, inplace=True) | |
# Force all the feature to be numeric and drop wrong values | |
for column in input_dataset.columns: | |
input_dataset[column] = pd.to_numeric(input_dataset[column], errors='coerce') | |
input_dataset.dropna(inplace=True) | |
# Scale it accordingly with the used scaler during the training phase | |
x = loaded_scaler.transform(input_dataset) | |
# Obtain prediction from model | |
predictions = loaded_model.predict_proba(x) | |
predictions_bankruptcy = predictions[:, 1] | |
# Generate the plot of predictions | |
fig = plt.figure(figsize=(15,7)) | |
plt.hist(predictions_bankruptcy, bins=50) | |
plt.xlabel('Probability of bankruptcy', fontsize=25) | |
plt.ylabel('Number of records', fontsize=25) | |
plt.legend(fontsize=15) | |
plt.tick_params(axis='both', labelsize=25, pad=5) | |
export_predictions_dict = { | |
"Bankruptcy probability 0-10%": float(sum(0.00 <= prediction < 0.10 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 10-20%": float(sum(0.10 <= prediction < 0.20 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 20-30%": float(sum(0.20 <= prediction < 0.30 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 30-40%": float(sum(0.30 <= prediction < 0.40 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 40-50%": float(sum(0.40 <= prediction < 0.50 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 50-60%": float(sum(0.50 <= prediction < 0.60 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 60-70%": float(sum(0.60 <= prediction < 0.70 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 70-80%": float(sum(0.70 <= prediction < 0.80 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 80-90%": float(sum(0.80 <= prediction < 0.90 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), | |
"Bankruptcy probability 90-100%": float(sum(0.90 <= prediction < 1 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)) | |
} | |
# Return prediction plot | |
return fig, export_predictions_dict | |
# Single record interface | |
if type_interface == "Single": | |
# Input file components | |
# For each feature create an optional numeric field | |
gradio_inputs = [] | |
for feature_name in feature_names: | |
gradio_inputs.append(gr.inputs.Number(default=0.0, label=feature_name, optional=False)) | |
# Create output components | |
gradio_outputs = [gr.outputs.Label(num_top_classes = 2, label="Prediction probability"), gr.outputs.Plot(type="auto", label="SHAP values")] | |
# Create the web app interface | |
demo = gr.Interface( | |
fn=classify_company, | |
inputs=gradio_inputs, | |
outputs=gradio_outputs, | |
theme="dark" | |
) | |
# Multiple records interface | |
else: | |
# Input file component | |
gradio_description = "Il file in formato Pickle deve contenere tutti i campi previsti dal modello." | |
gradio_input = gr.inputs.File(file_count="single", type="file", label="Pickle file", optional=False) | |
# Output file component | |
gradio_output = [gr.outputs.Plot(type="auto", label="Prediction probabilities"), "label"] | |
# Create the web app interface | |
demo = gr.Interface( | |
fn=classify_companies, | |
inputs=gradio_input, | |
outputs=gradio_output, | |
description=gradio_description, | |
theme="dark", | |
live=True | |
) | |
demo.launch(show_error=True, inline=False) |