from typing import Union import pandas as pd import streamlit as st import numpy as np from sklearn.metrics import ( classification_report, confusion_matrix, ) from sklearn.linear_model import LogisticRegression from xgboost.sklearn import XGBClassifier from common.data import SplitDataset from common.util import ( create_cross_validation_df, cross_validation_scores, get_df_trueStatus_probabilityDefault_threshStatus_loanAmount, ) from common.views import ( cross_validation_graph, ) def make_evaluation_view( model_name_short: str, model_name_generic: str, ): def view( clf_gbt_model: Union[XGBClassifier, LogisticRegression], split_dataset: SplitDataset, currency: str, prob_thresh_selected, predicted_default_status, ): st.header(f"Model Evaluation - {model_name_generic}") st.subheader("Cross Validation") st.write("Shows how our model will perform as new loans come in.") st.write( "If evaluation metric for test and train set improve as models \ train on each fold suggests performance will be stable." ) st.write(f"XGBoost cross validation test:") stcol_seed, stcol_eval_metric = st.columns(2) with stcol_seed: cv_seed = int( st.number_input( label="Random State Seed for Cross Validation:", value=123235, key=f"cv_seed_{model_name_short}", ) ) with stcol_eval_metric: eval_metric = st.selectbox( label="Select evaluation metric", options=[ "auc", "aucpr", "rmse", "mae", "logloss", "error", "merror", "mlogloss", ], key=f"eval_metric_{model_name_short}", ) stcol_trees, stcol_eval_nfold, stcol_earlystoppingrounds = st.columns( 3 ) with stcol_trees: trees = int( st.number_input( label="Number of trees", value=5, key=f"trees_{model_name_short}", ) ) with stcol_eval_nfold: nfolds = int( st.number_input( label="Number of folds", value=5, key=f"nfolds_{model_name_short}", ) ) with stcol_earlystoppingrounds: early_stopping_rounds = int( st.number_input( label="Early stopping rounds", value=10, key=f"early_stopping_rounds_{model_name_short}", ) ) DTrain, cv_df = create_cross_validation_df( split_dataset.X_test, split_dataset.y_test, eval_metric, cv_seed, trees, nfolds, early_stopping_rounds, ) st.write(cv_df) scoring_options = [ "roc_auc", "accuracy", "precision", "recall", "f1", "jaccard", ] overfit_test = st.radio( label="Overfit test:", options=("No", "Yes"), key=f"overfit_test_{model_name_short}", ) if overfit_test == "Yes": st.write("Overfit test:") iterations = int( st.number_input( label="Number of folds (iterations)", value=500, key=f"iterations_{model_name_short}", ) ) DTrain, cv_df_it = create_cross_validation_df( split_dataset.X_test, split_dataset.y_test, eval_metric, cv_seed, iterations, nfolds, iterations, ) fig_it = cross_validation_graph(cv_df_it, eval_metric, iterations) st.pyplot(fig_it) st.write("Sklearn cross validation test:") stcol_scoringmetric, st_nfold = st.columns(2) with stcol_scoringmetric: score_metric = st.selectbox( label="Select score", options=scoring_options, key=f"stcol_scoringmetric_{model_name_short}", ) with st_nfold: nfolds_score = int( st.number_input( label="Number of folds", value=5, key=f"st_nfold_{model_name_short}", ) ) cv_scores = cross_validation_scores( clf_gbt_model, split_dataset.X_test, split_dataset.y_test, nfolds_score, score_metric, cv_seed, ) stcol_vals, stcol_mean, st_std = st.columns(3) with stcol_vals: st.markdown(f"{score_metric} scores:") st.write( pd.DataFrame( cv_scores, columns=[score_metric], ) ) with stcol_mean: st.metric( label=f"Average {score_metric} score ", value="{:.4f}".format(cv_scores.mean()), delta=None, delta_color="normal", ) with st_std: st.metric( label=f"{score_metric} standard deviation (+/-)", value="{:.4f}".format(cv_scores.std()), delta=None, delta_color="normal", ) st.subheader("Classification Report") target_names = ["Non-Default", "Default"] classification_report_dict = classification_report( split_dataset.y_test, predicted_default_status, target_names=target_names, output_dict=True, ) ( stcol_defaultpres, stcol_defaultrecall, stcol_defaultf1score, stcol_f1score, ) = st.columns(4) with stcol_defaultpres: st.metric( label="Default Precision", value="{:.0%}".format( classification_report_dict["Default"]["precision"] ), delta=None, delta_color="normal", ) with stcol_defaultrecall: st.metric( label="Default Recall", value="{:.0%}".format( classification_report_dict["Default"]["recall"] ), delta=None, delta_color="normal", ) with stcol_defaultf1score: st.metric( label="Default F1 Score", value="{:.2f}".format( classification_report_dict["Default"]["f1-score"] ), delta=None, delta_color="normal", ) with stcol_f1score: st.metric( label="Macro avg F1 Score (Model F1 Score):", value="{:.2f}".format( classification_report_dict["macro avg"]["f1-score"] ), delta=None, delta_color="normal", ) with st.expander("Classification Report Dictionary:"): st.write(classification_report_dict) st.markdown( f'Default precision: {"{:.0%}".format(classification_report_dict["Default"]["precision"])} of loans predicted as default were actually default.' ) st.markdown( f'Default recall: {"{:.0%}".format(classification_report_dict["Default"]["recall"])} of true defaults predicted correctly.' ) f1_gap = 1 - classification_report_dict["Default"]["f1-score"] st.markdown( f'Default F1 score: {"{:.2f}".format(classification_report_dict["Default"]["f1-score"])}\ is {"{:.2f}".format(f1_gap)} away from perfect precision and recall (no false positive rate).' ) st.markdown( f'macro avg F1 score: {"{:.2f}".format(classification_report_dict["macro avg"]["f1-score"])} is the models F1 score.' ) st.subheader("Confusion Matrix") confuctiomatrix_dict = confusion_matrix( split_dataset.y_test, predicted_default_status ) tn, fp, fn, tp = confusion_matrix( split_dataset.y_test, predicted_default_status ).ravel() with st.expander( "Confusion matrix (column name = classification model prediction, row name = true status, values = number of loans" ): st.write(confuctiomatrix_dict) st.markdown( f'{tp} ,\ {"{:.0%}".format(tp / len(predicted_default_status))} \ true positives (defaults correctly predicted as defaults).' ) st.markdown( f'{fp} ,\ {"{:.0%}".format(fp / len(predicted_default_status))} \ false positives (non-defaults incorrectly predicted as defaults).' ) st.markdown( f'{fn} ,\ {"{:.0%}".format(fn / len(predicted_default_status))} \ false negatives (defaults incorrectly predicted as non-defaults).' ) st.markdown( f'{tn} ,\ {"{:.0%}".format(tn / len(predicted_default_status))} \ true negatives (non-defaults correctly predicted as non-defaults).' ) st.subheader("Bad Rate") df_trueStatus_probabilityDefault_threshStatus_loanAmount = ( get_df_trueStatus_probabilityDefault_threshStatus_loanAmount( clf_gbt_model, split_dataset.X_test, split_dataset.y_test, prob_thresh_selected, "loan_amnt", ) ) with st.expander( "Loan Status, Probability of Default, & Loan Amount DataFrame" ): st.write(df_trueStatus_probabilityDefault_threshStatus_loanAmount) accepted_loans = ( df_trueStatus_probabilityDefault_threshStatus_loanAmount[ df_trueStatus_probabilityDefault_threshStatus_loanAmount[ "PREDICT_DEFAULT_STATUS" ] == 0 ] ) bad_rate = ( np.sum(accepted_loans["loan_status"]) / accepted_loans["loan_status"].count() ) with st.expander("Loan Amount Summary Statistics"): st.write( df_trueStatus_probabilityDefault_threshStatus_loanAmount[ "loan_amnt" ].describe() ) avg_loan = np.mean( df_trueStatus_probabilityDefault_threshStatus_loanAmount[ "loan_amnt" ] ) crosstab_df = pd.crosstab( df_trueStatus_probabilityDefault_threshStatus_loanAmount[ "loan_status" ], # row label df_trueStatus_probabilityDefault_threshStatus_loanAmount[ "PREDICT_DEFAULT_STATUS" ], ).apply( lambda x: x * avg_loan, axis=0 ) # column label with st.expander( "Cross tabulation (column name = classification model prediction, row name = true status, values = number of loans * average loan value" ): st.write(crosstab_df) st.write( f'Bad rate: {"{:.2%}".format(bad_rate)} of all the loans the model accepted (classified as non-default) from the test set were actually defaults.' ) st.write( f'Estimated value of the bad rate is {currency} {"{:,.2f}".format(crosstab_df[0][1])}.' ) st.write( f'Total estimated value of actual non-default loans is {currency} {"{:,.2f}".format(crosstab_df[0][0]+crosstab_df[0][1])}' ) st.write( f'Estimated value of loans incorrectly predicted as default is {currency} {"{:,.2f}".format(crosstab_df[1][0])}' ) st.write( f'Estimated value of loans correctly predicted as defaults is {currency} {"{:,.2f}".format(crosstab_df[1][1])}' ) return df_trueStatus_probabilityDefault_threshStatus_loanAmount return view decision_tree_evaluation_view = make_evaluation_view("gbt", "Decision Tree") logistic_evaluation_view = make_evaluation_view("lg", "Logistic Regression")