Mahesh Babu
added the UI
210b96e
#Importing the necessary libraries
import pandas as pd
import torch
from streamlit_option_menu import option_menu
from plotting_helpers import (plot_top_5_products, plot_top_5_issues, plot_top_5_issues_in_product, plot_top_10_companies_complaints,
plot_top_10_states_most_complaints, plot_top_10_states_least_complaints, complaints_by_year,
complaints_across_states)
from transformers import pipeline
import streamlit as st
import pickle
import warnings
warnings.filterwarnings("ignore")
# Setting page config
st.set_page_config(page_title='CFPB Consumer Complaint Insights', page_icon='📋',
layout="wide", initial_sidebar_state='expanded')
@st.cache_data(show_spinner=False)
def load_process_data():
df = pd.read_csv('complaints.csv')
df['Date received'] = pd.to_datetime(df['Date received'])
cols_to_consider = ['Product','Sub-product','Issue','Sub-issue','Consumer complaint narrative','Company public response','Company',
'State', 'ZIP code', 'Date received']
df_new = df[cols_to_consider]
df_new = df_new.dropna()
product_map = {'Credit reporting or other personal consumer reports' : 'Credit Reporting',
'Credit reporting, credit repair services, or other personal consumer reports' : 'Credit Reporting',
'Payday loan, title loan, personal loan, or advance loan' : 'Loans / Mortgage',
'Payday loan, title loan, or personal loan' : 'Loans / Mortgage',
'Student loan' : 'Loans / Mortgage',
'Vehicle loan or lease' : 'Loans / Mortgage',
'Debt collection' : 'Debt collection',
'Credit card or prepaid card' : 'Credit/Prepaid Card',
'Credit card' : 'Credit/Prepaid Card',
'Prepaid card' : 'Credit/Prepaid Card',
'Mortgage' : 'Loans / Mortgage',
'Checking or savings account' : 'Checking or savings account'
}
df_new.loc[:,'Product'] = df_new['Product'].map(product_map)
df_new['complaint length'] = df_new['Consumer complaint narrative'].apply(lambda x : len(x))
df_new = df_new[df_new['complaint length'] > 20]
complaints_to_exclude = ['See document attached', 'See the attached documents.', 'Incorrect information on my credit report', 'incorrect information on my credit report',
'please see attached file','Please see documents Attached','Incorrect information on my credit report.', 'Please see attached file', 'see attached',
'See attached', 'SEE ATTACHED DOCUMENTS', 'See Attached', 'SEE ATTACHMENT', 'SEE ATTACHMENTS',
'XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX']
df_new = df_new[~df_new['Consumer complaint narrative'].isin(complaints_to_exclude)]
return df_new
# Load the processed data
df = load_process_data()
# Loading the product classifier model
device = "mps" if torch.backends.mps.is_available() else "cpu"
# Initialize the pipeline for classifying product
product_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-product-cfpb-complaints",
max_length = 512, truncation = True, device = device)
# Load sub-product classifier models
with open('subproduct_prediction/models/Credit_Reporting_model.pkl', 'rb') as f:
trained_model_cr= pickle.load(f)
with open('subproduct_prediction/models/Credit_Prepaid_Card_model.pkl', 'rb') as f:
trained_model_cp= pickle.load(f)
with open('subproduct_prediction/models/Checking_saving_model.pkl', 'rb') as f:
trained_model_cs=pickle.load(f)
with open('subproduct_prediction/models/loan_model.pkl', 'rb') as f:
trained_model_l= pickle.load(f)
with open('subproduct_prediction/models/Debt_model.pkl', 'rb') as f:
trained_model_d= pickle.load(f)
@st.cache_resource(show_spinner=False)
# Define a function to select the appropriate subproduct prediction model based on the predicted product
def select_subproduct_model(predicted_product):
if predicted_product == 'Credit Reporting' :
return trained_model_cr
elif predicted_product == 'Credit/Prepaid Card':
return trained_model_cp
elif predicted_product == 'Checking or savings account':
return trained_model_cs
elif predicted_product == 'Loans / Mortgage':
return trained_model_l
elif predicted_product == 'Debt collection':
return trained_model_d
else:
raise ValueError("Invalid predicted product category")
# Loading the issue classifier model
issue_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-issues-cfpb-complaints",
max_length = 512, truncation = True, device = device)
# Path to the models and their corresponding names
issue_model_files = {
'trained_model_account_operations': 'subproduct_prediction/issue_models/account_operations_and_unauthorized_transaction_issues.pkl',
'trained_model_collect_debt': 'subproduct_prediction/issue_models/attempts_to_collect_debt_not_owed.pkl',
'trained_model_closing_account': 'subproduct_prediction/issue_models/closing_an_account.pkl',
'trained_model_closing_your_account': 'subproduct_prediction/issue_models/closing_your_account.pkl',
'trained_model_credit_report': 'subproduct_prediction/issue_models/credit_report_and_monitoring_issues.pkl',
'trained_model_lender': 'subproduct_prediction/issue_models/dealing_with_your_lender_or_servicer.pkl',
'trained_model_disputes': 'subproduct_prediction/issue_models/disputes_and_misrepresentations.pkl',
'trained_model_improper_use_report': 'subproduct_prediction/issue_models/improper_use_of_your_report.pkl',
'trained_model_incorrect_info': 'subproduct_prediction/issue_models/incorrect_information_on_your_report.pkl',
'trained_model_legal_and_threat': 'subproduct_prediction/issue_models/legal_and_threat_actions.pkl',
'trained_model_managing_account': 'subproduct_prediction/issue_models/managing_an_account.pkl',
'trained_model_payment_funds': 'subproduct_prediction/issue_models/payment_and_funds_management.pkl',
'trained_model_investigation_wrt_issue': 'subproduct_prediction/issue_models/problem_with_a_company\'s_investigation_into_an_existing_issue.pkl',
'trained_model_investigation_wrt_problem': 'subproduct_prediction/issue_models/problem_with_a_company\'s_investigation_into_an_existing_problem.pkl',
'trained_model_credit_investigation_wrt_problem': 'subproduct_prediction/issue_models/problem_with_a_credit_reporting_company\'s_investigation_into_an_existing_problem.pkl',
'trained_model_purchase_shown': 'subproduct_prediction/issue_models/problem_with_a_purchase_shown_on_your_statement.pkl',
'trained_model_notification_about_debt': 'subproduct_prediction/issue_models/written_notification_about_debt.pkl',
}
issue_models = {}
for model_name, file_path in issue_model_files.items():
with open(file_path, 'rb') as f:
issue_models[model_name] = pickle.load(f)
# Define a function to select the appropriate subissue prediction model based on the predicted issue
def select_subissue_model(predicted_issue):
if predicted_issue == "Problem with a company's investigation into an existing problem":
return issue_models['trained_model_investigation_wrt_problem']
elif predicted_issue == "Problem with a credit reporting company's investigation into an existing problem":
return issue_models['trained_model_credit_investigation_wrt_problem']
elif predicted_issue == "Problem with a company's investigation into an existing issue":
return issue_models['trained_model_investigation_wrt_issue']
elif predicted_issue == "Problem with a purchase shown on your statement":
return issue_models['trained_model_purchase_shown']
elif predicted_issue == "Incorrect information on your report":
return issue_models['trained_model_incorrect_info']
elif predicted_issue == "Improper use of your report":
return issue_models['trained_model_improper_use_report']
elif predicted_issue == "Account Operations and Unauthorized Transaction Issues":
return issue_models['trained_model_account_operations']
elif predicted_issue == "Payment and Funds Management":
return issue_models['trained_model_payment_funds']
elif predicted_issue == "Managing an account":
return issue_models['trained_model_managing_account']
elif predicted_issue == "Attempts to collect debt not owed":
return issue_models['trained_model_collect_debt']
elif predicted_issue == "Written notification about debt":
return issue_models['trained_model_notification_about_debt']
elif predicted_issue == "Dealing with your lender or servicer":
return issue_models['trained_model_lender']
elif predicted_issue == "Disputes and Misrepresentations":
return issue_models['trained_model_disputes']
elif predicted_issue == "Closing your account":
return issue_models['trained_model_closing_your_account']
elif predicted_issue == "Closing an account":
return issue_models['trained_model_closing_account']
elif predicted_issue == "Credit Report and Monitoring Issues":
return issue_models['trained_model_credit_report']
elif predicted_issue == "Legal and Threat Actions":
return issue_models['trained_model_legal_and_threat']
else:
raise ValueError("Invalid predicted issue category")
# Custom Headers for enhancing UI Text elements
def custom_header(text, level=1):
if level == 1:
icon_url = "https://cfpb.github.io/design-system/images/uploads/logo_vertical_071720.png"
# Adjust the img style as needed (e.g., height, vertical alignment, margin)
st.markdown(f"""
<h1 style="text-align: center;">
<img src="{icon_url}" alt="Icon" style="vertical-align: middle; height: 112px; margin-right: -160px;">
<span style="color: #008000; font-family: 'Sans Serif';">{text}</span>
</h1>
""", unsafe_allow_html=True)
#st.markdown(f"<h1 style='text-align: center; color: #ef8236; font-family: sans serif;'>{text}</h1>", unsafe_allow_html=True)
elif level == 2:
st.markdown(f"<h2 style='text-align: center; color: #00749C; font-family: sans serif;'>{text}</h2>", unsafe_allow_html=True)
elif level == 3:
st.markdown(f"<h3 style='text-align: center; color: #00749C; font-family: sans serif;'>{text}</h3>", unsafe_allow_html=True)
elif level == 4:
st.markdown(f"<h5 style='text-align: center; color: #00749C; font-family: sans serif;'>{text}</h5>", unsafe_allow_html=True)
elif level == 5:
st.markdown(f"<h5 style='text-align: center; color: #f63366; font-family: sans serif;'>{text}</h5>", unsafe_allow_html=True)
# Helper function for classifying the complaint
def classify_complaint(narrative):
# Predict product category
predicted_product = product_classifier(narrative)[0]['label']
# Load the appropriate subproduct prediction model
subproduct_model = select_subproduct_model(predicted_product)
# Predict subproduct category using the selected model
predicted_subproduct = subproduct_model.predict([narrative])[0]
# Predict the appropriate issue category using the narrative
predicted_issue = issue_classifier(narrative)[0]['label']
# Load the appropriate subissue prediction model
subissue_model = select_subissue_model(predicted_issue)
# Predict subissue category using the selected model
predicted_subissue = subissue_model.predict([narrative])[0]
return {
"Product" : predicted_product,
"Sub-product" : predicted_subproduct,
"Issue" : predicted_issue,
"Sub-issue" : predicted_subissue
}
# Helper function to display key insights
def plot_eda_charts(level):
if level == 1:
fig = complaints_by_year(df)
return fig
if level == 2:
fig = complaints_across_states(df)
return fig
if level == 3:
fig = plot_top_5_products(df)
return fig
if level == 4:
fig = plot_top_5_issues(df)
return fig
if level == 5:
fig = plot_top_5_issues_in_product(df)
return fig
if level == 6:
fig = plot_top_10_companies_complaints(df)
return fig
if level == 7:
fig = plot_top_10_states_most_complaints(df)
return fig
if level == 8:
fig = plot_top_10_states_least_complaints(df)
return fig
# Navigation setup
with st.sidebar:
selected = option_menu(menu_title = "Navigate",
options = ["Home", "Key Insights", "Complaint Classifier"]
,default_index = 0)
# Home Page
if selected == "Home":
custom_header('CFPB Consumer Complaint Insights', level=1)
# Introduction
st.markdown("""
<div style='text-align: center; color: #333; font-size: 20px;'>
<p><strong>Uncover Consumer Trends and Automate Complaint Categorization with CFPB Insights</strong></p>
</div>
""", unsafe_allow_html=True)
st.write("\n")
# Project Motivation
st.markdown("""
### :orange[Motivation]
Consumers can face challenges with financial products and services, leading to complaints that may not always be resolved directly with financial institutions. The **Consumer Financial Protection Bureau (CFPB)** acts as a mediator in these scenarios. However, consumers often struggle to categorize their complaints accurately, leading to inefficiencies in the resolution process. Our project aims to **facilitate faster resolution** by automatically categorizing complaints based on narrative descriptions, enhancing the efficiency of complaint management.
""", unsafe_allow_html=True)
# Impact
st.markdown("""
### :green[Impact]
The implementation of our project has two primary impacts:
- **Ease for Consumers:** Automates the tagging of complaints into appropriate categories, reducing the need for consumers to understand complex financial product categories.
- **Industry Adoption:** Offers a streamlined approach to complaint handling that can be adopted by financial institutions beyond the CFPB, promoting consistency across the industry.
""", unsafe_allow_html=True)
# Complaint Classifier
st.markdown("""
#### :blue[Complaint Classifier]
Our dashboard features an innovative :rainbow[**Complaint Classifier**] that utilizes the narrative descriptions provided by consumers to categorize complaints into the correct product, issue, and sub-issue categories. This tool simplifies the submission process for consumers and enhances the efficiency of complaint resolution.
""", unsafe_allow_html=True)
# Key Insights Page
elif selected == "Key Insights":
headers = ["Evolution of complaints across years", "Complaints across US states",
"Top 5 Common Product Categories", "Top 5 Common Issue Categories",
"Top 5 Issues in Each Product Category", "Top 10 Companies with Most Complaints in 2023",
"Top 10 states with Most Complaints", "Top 10 states with Least Complaints"]
custom_header("Key Insights", level=1)
st.write("\n")
st.write("\n")
st.write("\n")
for i in range(0, len(headers), 2):
cols = st.columns(2) # Create two columns
with cols[0]:
custom_header(headers[i], level=4)
fig = plot_eda_charts(level=i+1)
st.plotly_chart(fig, use_container_width=True)
if (i+1) < len(headers):
with cols[1]:
custom_header(headers[i+1], level=4)
fig = plot_eda_charts(level=i+2)
st.plotly_chart(fig, use_container_width=True)
# Complaints Classifier Page
elif selected == "Complaint Classifier":
custom_header("Complaint Classifier", level=2)
st.write("\n")
# Using a key for the text_area widget to reference its current value
query = st.text_area("Enter your complaint:", placeholder="It is absurd that I have consistently made timely payments for this account and have never been overdue. I kindly request that you promptly update my account to reflect this accurately.", key="input_text")
if st.button("Classify Complaint"):
if query.strip(): # Check if the input is not empty
with st.spinner("Classifying Complaint..."):
result = classify_complaint(query)
if result: # Check if the result is not empty
st.success("Complaint Classification Results:")
#Using HTML for better control over formatting
st.markdown(f"""
**Product:** :blue[{result.get("Product")}]<br>
**Sub-product:** :green[{result.get("Sub-product")}]<br>
**Issue:** :red[{result.get("Issue")}]<br>
**Sub-issue:** :orange[{result.get("Sub-issue")}]<br>
""", unsafe_allow_html=True)
st.write("\n\n")
st.header("", divider= 'rainbow')
else:
st.error("Failed to classify the complaint. Please try again.")
#time.sleep(1)
st.balloons() # Celebratory balloons on successful classification
else:
st.info("Please enter a complaint to classify.")