import gradio as gr import pandas as pd import joblib import numpy as np import shap import matplotlib.pyplot as plt # Path of the desired scaler to be tested, can be changed PATH_SCALER = "standard_scaler_1.sav" # Path of the desired model to be tested, can be changed PATH_MODEL = "random_forest_1.sav" # Select the type of interface, "Single" for manually inserting a record # "Multiple" for automatically inserting multiple records from a pickle file type_interface = "Single" loaded_scaler = joblib.load(PATH_SCALER) loaded_model = joblib.load(PATH_MODEL) feature_names = loaded_model.feature_names # Define what to do with data given from the user form def classify_company(*feature_values): # Create Pandas dataframe x = pd.DataFrame([feature_values], columns=loaded_model.feature_names, dtype=float) # Scale it accordingly with the used scaler during the training phase scaled_x = loaded_scaler.transform(x) # Obtain prediction from model prediction = loaded_model.predict_proba(scaled_x)[0] prediction_dict = {"Active": float(prediction[0]), "Bankruptcy": float(prediction[1])} # Create SHAP explainer explainer = shap.TreeExplainer(loaded_model) shap_values = explainer.shap_values(scaled_x) # Obtain SHAP values plot shap.force_plot( base_value=explainer.expected_value[0], shap_values=shap_values[0], features=x, feature_names=loaded_model.feature_names, out_names=["Active", "Bankruptcy"], matplotlib=True, figsize=(30,7), show=False, ) # Create figure plt.tight_layout() fig = plt.gcf() plt.close() # Return prediction values and shap plot return prediction_dict, fig # Define what to do with data given from the Pickle file def classify_companies(file): # Read file as Pickle input_dataset = pd.read_pickle(file.name) input_dataset = input_dataset[feature_names] # Remove unused index input_dataset.reset_index(drop=True, inplace=True) # Force all the feature to be numeric and drop wrong values for column in input_dataset.columns: input_dataset[column] = pd.to_numeric(input_dataset[column], errors='coerce') input_dataset.dropna(inplace=True) # Scale it accordingly with the used scaler during the training phase x = loaded_scaler.transform(input_dataset) # Obtain prediction from model predictions = loaded_model.predict_proba(x) predictions_bankruptcy = predictions[:, 1] # Generate the plot of predictions fig = plt.figure(figsize=(15,7)) plt.hist(predictions_bankruptcy, bins=50) plt.xlabel('Probability of bankruptcy', fontsize=25) plt.ylabel('Number of records', fontsize=25) plt.legend(fontsize=15) plt.tick_params(axis='both', labelsize=25, pad=5) export_predictions_dict = { "Bankruptcy probability 0-10%": float(sum(0.00 <= prediction < 0.10 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 10-20%": float(sum(0.10 <= prediction < 0.20 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 20-30%": float(sum(0.20 <= prediction < 0.30 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 30-40%": float(sum(0.30 <= prediction < 0.40 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 40-50%": float(sum(0.40 <= prediction < 0.50 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 50-60%": float(sum(0.50 <= prediction < 0.60 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 60-70%": float(sum(0.60 <= prediction < 0.70 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 70-80%": float(sum(0.70 <= prediction < 0.80 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 80-90%": float(sum(0.80 <= prediction < 0.90 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)), "Bankruptcy probability 90-100%": float(sum(0.90 <= prediction < 1 for prediction in predictions_bankruptcy) / len(predictions_bankruptcy)) } # Return prediction plot return fig, export_predictions_dict # Single record interface if type_interface == "Single": # Input file components # For each feature create an optional numeric field gradio_inputs = [] for feature_name in feature_names: gradio_inputs.append(gr.inputs.Number(default=0.0, label=feature_name, optional=False)) # Create output components gradio_outputs = [gr.outputs.Label(num_top_classes = 2, label="Prediction probability"), gr.outputs.Plot(type="auto", label="SHAP values")] # Create the web app interface demo = gr.Interface( fn=classify_company, inputs=gradio_inputs, outputs=gradio_outputs, theme="dark" ) # Multiple records interface else: # Input file component gradio_description = "Il file in formato Pickle deve contenere tutti i campi previsti dal modello." gradio_input = gr.inputs.File(file_count="single", type="file", label="Pickle file", optional=False) # Output file component gradio_output = [gr.outputs.Plot(type="auto", label="Prediction probabilities"), "label"] # Create the web app interface demo = gr.Interface( fn=classify_companies, inputs=gradio_input, outputs=gradio_output, description=gradio_description, theme="dark", live=True ) demo.launch(show_error=True, inline=False)