Spaces:
Build error
Build error
| from typing import Union | |
| import pandas as pd | |
| import streamlit as st | |
| import numpy as np | |
| from sklearn.metrics import ( | |
| classification_report, | |
| confusion_matrix, | |
| ) | |
| from sklearn.linear_model import LogisticRegression | |
| from xgboost.sklearn import XGBClassifier | |
| from common.data import SplitDataset | |
| from common.util import ( | |
| create_cross_validation_df, | |
| cross_validation_scores, | |
| get_df_trueStatus_probabilityDefault_threshStatus_loanAmount, | |
| ) | |
| from common.views import ( | |
| cross_validation_graph, | |
| ) | |
| def make_evaluation_view( | |
| model_name_short: str, | |
| model_name_generic: str, | |
| ): | |
| def view( | |
| clf_gbt_model: Union[XGBClassifier, LogisticRegression], | |
| split_dataset: SplitDataset, | |
| currency: str, | |
| prob_thresh_selected, | |
| predicted_default_status, | |
| ): | |
| st.header(f"Model Evaluation - {model_name_generic}") | |
| st.subheader("Cross Validation") | |
| st.write("Shows how our model will perform as new loans come in.") | |
| st.write( | |
| "If evaluation metric for test and train set improve as models \ | |
| train on each fold suggests performance will be stable." | |
| ) | |
| st.write(f"XGBoost cross validation test:") | |
| stcol_seed, stcol_eval_metric = st.columns(2) | |
| with stcol_seed: | |
| cv_seed = int( | |
| st.number_input( | |
| label="Random State Seed for Cross Validation:", | |
| value=123235, | |
| key=f"cv_seed_{model_name_short}", | |
| ) | |
| ) | |
| with stcol_eval_metric: | |
| eval_metric = st.selectbox( | |
| label="Select evaluation metric", | |
| options=[ | |
| "auc", | |
| "aucpr", | |
| "rmse", | |
| "mae", | |
| "logloss", | |
| "error", | |
| "merror", | |
| "mlogloss", | |
| ], | |
| key=f"eval_metric_{model_name_short}", | |
| ) | |
| stcol_trees, stcol_eval_nfold, stcol_earlystoppingrounds = st.columns( | |
| 3 | |
| ) | |
| with stcol_trees: | |
| trees = int( | |
| st.number_input( | |
| label="Number of trees", | |
| value=5, | |
| key=f"trees_{model_name_short}", | |
| ) | |
| ) | |
| with stcol_eval_nfold: | |
| nfolds = int( | |
| st.number_input( | |
| label="Number of folds", | |
| value=5, | |
| key=f"nfolds_{model_name_short}", | |
| ) | |
| ) | |
| with stcol_earlystoppingrounds: | |
| early_stopping_rounds = int( | |
| st.number_input( | |
| label="Early stopping rounds", | |
| value=10, | |
| key=f"early_stopping_rounds_{model_name_short}", | |
| ) | |
| ) | |
| DTrain, cv_df = create_cross_validation_df( | |
| split_dataset.X_test, | |
| split_dataset.y_test, | |
| eval_metric, | |
| cv_seed, | |
| trees, | |
| nfolds, | |
| early_stopping_rounds, | |
| ) | |
| st.write(cv_df) | |
| scoring_options = [ | |
| "roc_auc", | |
| "accuracy", | |
| "precision", | |
| "recall", | |
| "f1", | |
| "jaccard", | |
| ] | |
| overfit_test = st.radio( | |
| label="Overfit test:", | |
| options=("No", "Yes"), | |
| key=f"overfit_test_{model_name_short}", | |
| ) | |
| if overfit_test == "Yes": | |
| st.write("Overfit test:") | |
| iterations = int( | |
| st.number_input( | |
| label="Number of folds (iterations)", | |
| value=500, | |
| key=f"iterations_{model_name_short}", | |
| ) | |
| ) | |
| DTrain, cv_df_it = create_cross_validation_df( | |
| split_dataset.X_test, | |
| split_dataset.y_test, | |
| eval_metric, | |
| cv_seed, | |
| iterations, | |
| nfolds, | |
| iterations, | |
| ) | |
| fig_it = cross_validation_graph(cv_df_it, eval_metric, iterations) | |
| st.pyplot(fig_it) | |
| st.write("Sklearn cross validation test:") | |
| stcol_scoringmetric, st_nfold = st.columns(2) | |
| with stcol_scoringmetric: | |
| score_metric = st.selectbox( | |
| label="Select score", | |
| options=scoring_options, | |
| key=f"stcol_scoringmetric_{model_name_short}", | |
| ) | |
| with st_nfold: | |
| nfolds_score = int( | |
| st.number_input( | |
| label="Number of folds", | |
| value=5, | |
| key=f"st_nfold_{model_name_short}", | |
| ) | |
| ) | |
| cv_scores = cross_validation_scores( | |
| clf_gbt_model, | |
| split_dataset.X_test, | |
| split_dataset.y_test, | |
| nfolds_score, | |
| score_metric, | |
| cv_seed, | |
| ) | |
| stcol_vals, stcol_mean, st_std = st.columns(3) | |
| with stcol_vals: | |
| st.markdown(f"{score_metric} scores:") | |
| st.write( | |
| pd.DataFrame( | |
| cv_scores, | |
| columns=[score_metric], | |
| ) | |
| ) | |
| with stcol_mean: | |
| st.metric( | |
| label=f"Average {score_metric} score ", | |
| value="{:.4f}".format(cv_scores.mean()), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| with st_std: | |
| st.metric( | |
| label=f"{score_metric} standard deviation (+/-)", | |
| value="{:.4f}".format(cv_scores.std()), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| st.subheader("Classification Report") | |
| target_names = ["Non-Default", "Default"] | |
| classification_report_dict = classification_report( | |
| split_dataset.y_test, | |
| predicted_default_status, | |
| target_names=target_names, | |
| output_dict=True, | |
| ) | |
| ( | |
| stcol_defaultpres, | |
| stcol_defaultrecall, | |
| stcol_defaultf1score, | |
| stcol_f1score, | |
| ) = st.columns(4) | |
| with stcol_defaultpres: | |
| st.metric( | |
| label="Default Precision", | |
| value="{:.0%}".format( | |
| classification_report_dict["Default"]["precision"] | |
| ), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| with stcol_defaultrecall: | |
| st.metric( | |
| label="Default Recall", | |
| value="{:.0%}".format( | |
| classification_report_dict["Default"]["recall"] | |
| ), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| with stcol_defaultf1score: | |
| st.metric( | |
| label="Default F1 Score", | |
| value="{:.2f}".format( | |
| classification_report_dict["Default"]["f1-score"] | |
| ), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| with stcol_f1score: | |
| st.metric( | |
| label="Macro avg F1 Score (Model F1 Score):", | |
| value="{:.2f}".format( | |
| classification_report_dict["macro avg"]["f1-score"] | |
| ), | |
| delta=None, | |
| delta_color="normal", | |
| ) | |
| with st.expander("Classification Report Dictionary:"): | |
| st.write(classification_report_dict) | |
| st.markdown( | |
| f'Default precision: {"{:.0%}".format(classification_report_dict["Default"]["precision"])} of loans predicted as default were actually default.' | |
| ) | |
| st.markdown( | |
| f'Default recall: {"{:.0%}".format(classification_report_dict["Default"]["recall"])} of true defaults predicted correctly.' | |
| ) | |
| f1_gap = 1 - classification_report_dict["Default"]["f1-score"] | |
| st.markdown( | |
| f'Default F1 score: {"{:.2f}".format(classification_report_dict["Default"]["f1-score"])}\ | |
| is {"{:.2f}".format(f1_gap)} away from perfect precision and recall (no false positive rate).' | |
| ) | |
| st.markdown( | |
| f'macro avg F1 score: {"{:.2f}".format(classification_report_dict["macro avg"]["f1-score"])} is the models F1 score.' | |
| ) | |
| st.subheader("Confusion Matrix") | |
| confuctiomatrix_dict = confusion_matrix( | |
| split_dataset.y_test, predicted_default_status | |
| ) | |
| tn, fp, fn, tp = confusion_matrix( | |
| split_dataset.y_test, predicted_default_status | |
| ).ravel() | |
| with st.expander( | |
| "Confusion matrix (column name = classification model prediction, row name = true status, values = number of loans" | |
| ): | |
| st.write(confuctiomatrix_dict) | |
| st.markdown( | |
| f'{tp} ,\ | |
| {"{:.0%}".format(tp / len(predicted_default_status))} \ | |
| true positives (defaults correctly predicted as defaults).' | |
| ) | |
| st.markdown( | |
| f'{fp} ,\ | |
| {"{:.0%}".format(fp / len(predicted_default_status))} \ | |
| false positives (non-defaults incorrectly predicted as defaults).' | |
| ) | |
| st.markdown( | |
| f'{fn} ,\ | |
| {"{:.0%}".format(fn / len(predicted_default_status))} \ | |
| false negatives (defaults incorrectly predicted as non-defaults).' | |
| ) | |
| st.markdown( | |
| f'{tn} ,\ | |
| {"{:.0%}".format(tn / len(predicted_default_status))} \ | |
| true negatives (non-defaults correctly predicted as non-defaults).' | |
| ) | |
| st.subheader("Bad Rate") | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount = ( | |
| get_df_trueStatus_probabilityDefault_threshStatus_loanAmount( | |
| clf_gbt_model, | |
| split_dataset.X_test, | |
| split_dataset.y_test, | |
| prob_thresh_selected, | |
| "loan_amnt", | |
| ) | |
| ) | |
| with st.expander( | |
| "Loan Status, Probability of Default, & Loan Amount DataFrame" | |
| ): | |
| st.write(df_trueStatus_probabilityDefault_threshStatus_loanAmount) | |
| accepted_loans = ( | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| "PREDICT_DEFAULT_STATUS" | |
| ] | |
| == 0 | |
| ] | |
| ) | |
| bad_rate = ( | |
| np.sum(accepted_loans["loan_status"]) | |
| / accepted_loans["loan_status"].count() | |
| ) | |
| with st.expander("Loan Amount Summary Statistics"): | |
| st.write( | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| "loan_amnt" | |
| ].describe() | |
| ) | |
| avg_loan = np.mean( | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| "loan_amnt" | |
| ] | |
| ) | |
| crosstab_df = pd.crosstab( | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| "loan_status" | |
| ], # row label | |
| df_trueStatus_probabilityDefault_threshStatus_loanAmount[ | |
| "PREDICT_DEFAULT_STATUS" | |
| ], | |
| ).apply( | |
| lambda x: x * avg_loan, axis=0 | |
| ) # column label | |
| with st.expander( | |
| "Cross tabulation (column name = classification model prediction, row name = true status, values = number of loans * average loan value" | |
| ): | |
| st.write(crosstab_df) | |
| st.write( | |
| f'Bad rate: {"{:.2%}".format(bad_rate)} of all the loans the model accepted (classified as non-default) from the test set were actually defaults.' | |
| ) | |
| st.write( | |
| f'Estimated value of the bad rate is {currency} {"{:,.2f}".format(crosstab_df[0][1])}.' | |
| ) | |
| st.write( | |
| f'Total estimated value of actual non-default loans is {currency} {"{:,.2f}".format(crosstab_df[0][0]+crosstab_df[0][1])}' | |
| ) | |
| st.write( | |
| f'Estimated value of loans incorrectly predicted as default is {currency} {"{:,.2f}".format(crosstab_df[1][0])}' | |
| ) | |
| st.write( | |
| f'Estimated value of loans correctly predicted as defaults is {currency} {"{:,.2f}".format(crosstab_df[1][1])}' | |
| ) | |
| return df_trueStatus_probabilityDefault_threshStatus_loanAmount | |
| return view | |
| decision_tree_evaluation_view = make_evaluation_view("gbt", "Decision Tree") | |
| logistic_evaluation_view = make_evaluation_view("lg", "Logistic Regression") | |