Spaces:
Runtime error
Runtime error
File size: 5,791 Bytes
8b2fb3f 0d7b3b3 8b2fb3f 0d7b3b3 8b2fb3f 755a039 a3b9ac3 755a039 8b2fb3f c8e2d7a 8b2fb3f f5d5b92 8b2fb3f f5d5b92 8b2fb3f f5d5b92 8b2fb3f f5d5b92 8b2fb3f f2c255f 8b2fb3f f2c255f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import gradio as gr
import pandas as pd
import joblib
import numpy as np
import shap
import matplotlib.pyplot as plt
# Path of the desired scaler to be tested, can be changed
PATH_SCALER = "standard_scaler_1.sav"
# Path of the desired model to be tested, can be changed
PATH_MODEL = "random_forest_1.sav"
# Select the type of interface, "Single" for manually inserting a record
# "Multiple" for automatically inserting multiple records from a pickle file
type_interface = "Single"
loaded_scaler = joblib.load(PATH_SCALER)
loaded_model = joblib.load(PATH_MODEL)
feature_names = loaded_model.feature_names
# Define what to do with data given from the user form
def classify_company(*feature_values):
# Create Pandas dataframe
x = pd.DataFrame([feature_values], columns=loaded_model.feature_names, dtype=float)
# Scale it accordingly with the used scaler during the training phase
scaled_x = loaded_scaler.transform(x)
# Obtain prediction from model
prediction = loaded_model.predict_proba(scaled_x)[0]
prediction_dict = {"Active": float(prediction[0]), "Bankruptcy": float(prediction[1])}
# Create SHAP explainer
explainer = shap.TreeExplainer(loaded_model)
shap_values = explainer.shap_values(scaled_x)
# Obtain SHAP values plot
shap.force_plot(
base_value=explainer.expected_value[0],
shap_values=shap_values[0],
features=x,
feature_names=loaded_model.feature_names,
out_names=["Active", "Bankruptcy"],
matplotlib=True,
figsize=(30,7),
show=False,
)
# Create figure
plt.tight_layout()
fig = plt.gcf()
plt.close()
# Return prediction values and shap plot
return prediction_dict, fig
# Define what to do with data given from the Pickle file
def classify_companies(file):
# Read file as Pickle
input_dataset = pd.read_pickle(file.name)
input_dataset = input_dataset[feature_names]
# Remove unused index
input_dataset.reset_index(drop=True, inplace=True)
# Force all the feature to be numeric and drop wrong values
for column in input_dataset.columns:
input_dataset[column] = pd.to_numeric(input_dataset[column], errors='coerce')
input_dataset.dropna(inplace=True)
# Scale it accordingly with the used scaler during the training phase
x = loaded_scaler.transform(input_dataset)
# Obtain prediction from model
predictions = loaded_model.predict_proba(x)
predictions_bankruptcy = predictions[:, 1]
# Generate the plot of predictions
fig = plt.figure(figsize=(15,7))
plt.hist(predictions_bankruptcy, bins=50)
plt.xlabel('Probability of bankruptcy', fontsize=25)
plt.ylabel('Number of records', fontsize=25)
plt.legend(fontsize=15)
plt.tick_params(axis='both', labelsize=25, pad=5)
export_predictions_dict = {
"Bankruptcy probability 0-10%": float(sum(0.00 <= prediction < 0.10 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 10-20%": float(sum(0.10 <= prediction < 0.20 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 20-30%": float(sum(0.20 <= prediction < 0.30 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 30-40%": float(sum(0.30 <= prediction < 0.40 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 40-50%": float(sum(0.40 <= prediction < 0.50 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 50-60%": float(sum(0.50 <= prediction < 0.60 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 60-70%": float(sum(0.60 <= prediction < 0.70 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 70-80%": float(sum(0.70 <= prediction < 0.80 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 80-90%": float(sum(0.80 <= prediction < 0.90 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)),
"Bankruptcy probability 90-100%": float(sum(0.90 <= prediction < 1 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy))
}
# Return prediction plot
return fig, export_predictions_dict
# Single record interface
if type_interface == "Single":
# Input file components
# For each feature create an optional numeric field
gradio_inputs = []
for feature_name in feature_names:
gradio_inputs.append(gr.inputs.Number(default=0.0, label=feature_name, optional=False))
# Create output components
gradio_outputs = [gr.outputs.Label(num_top_classes = 2, label="Prediction probability"), gr.outputs.Plot(type="auto", label="SHAP values")]
# Create the web app interface
demo = gr.Interface(
fn=classify_company,
inputs=gradio_inputs,
outputs=gradio_outputs,
theme="dark"
)
# Multiple records interface
else:
# Input file component
gradio_description = "Il file in formato Pickle deve contenere tutti i campi previsti dal modello."
gradio_input = gr.inputs.File(file_count="single", type="file", label="Pickle file", optional=False)
# Output file component
gradio_output = [gr.outputs.Plot(type="auto", label="Prediction probabilities"), "label"]
# Create the web app interface
demo = gr.Interface(
fn=classify_companies,
inputs=gradio_input,
outputs=gradio_output,
description=gradio_description,
theme="dark",
live=True
)
demo.launch(show_error=True, inline=False) |