#Importing the necessary libraries import pandas as pd import torch from streamlit_option_menu import option_menu from plotting_helpers import (plot_top_5_products, plot_top_5_issues, plot_top_5_issues_in_product, plot_top_10_companies_complaints, plot_top_10_states_most_complaints, plot_top_10_states_least_complaints, complaints_by_year, complaints_across_states) from transformers import pipeline import streamlit as st import pickle import warnings warnings.filterwarnings("ignore") # Setting page config st.set_page_config(page_title='CFPB Consumer Complaint Insights', page_icon='📋', layout="wide", initial_sidebar_state='expanded') @st.cache_data(show_spinner=False) def load_process_data(): df = pd.read_csv('complaints.csv') df['Date received'] = pd.to_datetime(df['Date received']) cols_to_consider = ['Product','Sub-product','Issue','Sub-issue','Consumer complaint narrative','Company public response','Company', 'State', 'ZIP code', 'Date received'] df_new = df[cols_to_consider] df_new = df_new.dropna() product_map = {'Credit reporting or other personal consumer reports' : 'Credit Reporting', 'Credit reporting, credit repair services, or other personal consumer reports' : 'Credit Reporting', 'Payday loan, title loan, personal loan, or advance loan' : 'Loans / Mortgage', 'Payday loan, title loan, or personal loan' : 'Loans / Mortgage', 'Student loan' : 'Loans / Mortgage', 'Vehicle loan or lease' : 'Loans / Mortgage', 'Debt collection' : 'Debt collection', 'Credit card or prepaid card' : 'Credit/Prepaid Card', 'Credit card' : 'Credit/Prepaid Card', 'Prepaid card' : 'Credit/Prepaid Card', 'Mortgage' : 'Loans / Mortgage', 'Checking or savings account' : 'Checking or savings account' } df_new.loc[:,'Product'] = df_new['Product'].map(product_map) df_new['complaint length'] = df_new['Consumer complaint narrative'].apply(lambda x : len(x)) df_new = df_new[df_new['complaint length'] > 20] complaints_to_exclude = ['See document attached', 'See the attached documents.', 'Incorrect information on my credit report', 'incorrect information on my credit report', 'please see attached file','Please see documents Attached','Incorrect information on my credit report.', 'Please see attached file', 'see attached', 'See attached', 'SEE ATTACHED DOCUMENTS', 'See Attached', 'SEE ATTACHMENT', 'SEE ATTACHMENTS', 'XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX'] df_new = df_new[~df_new['Consumer complaint narrative'].isin(complaints_to_exclude)] return df_new # Load the processed data df = load_process_data() # Loading the product classifier model device = "mps" if torch.backends.mps.is_available() else "cpu" # Initialize the pipeline for classifying product product_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-product-cfpb-complaints", max_length = 512, truncation = True, device = device) # Load sub-product classifier models with open('subproduct_prediction/models/Credit_Reporting_model.pkl', 'rb') as f: trained_model_cr= pickle.load(f) with open('subproduct_prediction/models/Credit_Prepaid_Card_model.pkl', 'rb') as f: trained_model_cp= pickle.load(f) with open('subproduct_prediction/models/Checking_saving_model.pkl', 'rb') as f: trained_model_cs=pickle.load(f) with open('subproduct_prediction/models/loan_model.pkl', 'rb') as f: trained_model_l= pickle.load(f) with open('subproduct_prediction/models/Debt_model.pkl', 'rb') as f: trained_model_d= pickle.load(f) @st.cache_resource(show_spinner=False) # Define a function to select the appropriate subproduct prediction model based on the predicted product def select_subproduct_model(predicted_product): if predicted_product == 'Credit Reporting' : return trained_model_cr elif predicted_product == 'Credit/Prepaid Card': return trained_model_cp elif predicted_product == 'Checking or savings account': return trained_model_cs elif predicted_product == 'Loans / Mortgage': return trained_model_l elif predicted_product == 'Debt collection': return trained_model_d else: raise ValueError("Invalid predicted product category") # Loading the issue classifier model issue_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-issues-cfpb-complaints", max_length = 512, truncation = True, device = device) # Path to the models and their corresponding names issue_model_files = { 'trained_model_account_operations': 'subproduct_prediction/issue_models/account_operations_and_unauthorized_transaction_issues.pkl', 'trained_model_collect_debt': 'subproduct_prediction/issue_models/attempts_to_collect_debt_not_owed.pkl', 'trained_model_closing_account': 'subproduct_prediction/issue_models/closing_an_account.pkl', 'trained_model_closing_your_account': 'subproduct_prediction/issue_models/closing_your_account.pkl', 'trained_model_credit_report': 'subproduct_prediction/issue_models/credit_report_and_monitoring_issues.pkl', 'trained_model_lender': 'subproduct_prediction/issue_models/dealing_with_your_lender_or_servicer.pkl', 'trained_model_disputes': 'subproduct_prediction/issue_models/disputes_and_misrepresentations.pkl', 'trained_model_improper_use_report': 'subproduct_prediction/issue_models/improper_use_of_your_report.pkl', 'trained_model_incorrect_info': 'subproduct_prediction/issue_models/incorrect_information_on_your_report.pkl', 'trained_model_legal_and_threat': 'subproduct_prediction/issue_models/legal_and_threat_actions.pkl', 'trained_model_managing_account': 'subproduct_prediction/issue_models/managing_an_account.pkl', 'trained_model_payment_funds': 'subproduct_prediction/issue_models/payment_and_funds_management.pkl', 'trained_model_investigation_wrt_issue': 'subproduct_prediction/issue_models/problem_with_a_company\'s_investigation_into_an_existing_issue.pkl', 'trained_model_investigation_wrt_problem': 'subproduct_prediction/issue_models/problem_with_a_company\'s_investigation_into_an_existing_problem.pkl', 'trained_model_credit_investigation_wrt_problem': 'subproduct_prediction/issue_models/problem_with_a_credit_reporting_company\'s_investigation_into_an_existing_problem.pkl', 'trained_model_purchase_shown': 'subproduct_prediction/issue_models/problem_with_a_purchase_shown_on_your_statement.pkl', 'trained_model_notification_about_debt': 'subproduct_prediction/issue_models/written_notification_about_debt.pkl', } issue_models = {} for model_name, file_path in issue_model_files.items(): with open(file_path, 'rb') as f: issue_models[model_name] = pickle.load(f) # Define a function to select the appropriate subissue prediction model based on the predicted issue def select_subissue_model(predicted_issue): if predicted_issue == "Problem with a company's investigation into an existing problem": return issue_models['trained_model_investigation_wrt_problem'] elif predicted_issue == "Problem with a credit reporting company's investigation into an existing problem": return issue_models['trained_model_credit_investigation_wrt_problem'] elif predicted_issue == "Problem with a company's investigation into an existing issue": return issue_models['trained_model_investigation_wrt_issue'] elif predicted_issue == "Problem with a purchase shown on your statement": return issue_models['trained_model_purchase_shown'] elif predicted_issue == "Incorrect information on your report": return issue_models['trained_model_incorrect_info'] elif predicted_issue == "Improper use of your report": return issue_models['trained_model_improper_use_report'] elif predicted_issue == "Account Operations and Unauthorized Transaction Issues": return issue_models['trained_model_account_operations'] elif predicted_issue == "Payment and Funds Management": return issue_models['trained_model_payment_funds'] elif predicted_issue == "Managing an account": return issue_models['trained_model_managing_account'] elif predicted_issue == "Attempts to collect debt not owed": return issue_models['trained_model_collect_debt'] elif predicted_issue == "Written notification about debt": return issue_models['trained_model_notification_about_debt'] elif predicted_issue == "Dealing with your lender or servicer": return issue_models['trained_model_lender'] elif predicted_issue == "Disputes and Misrepresentations": return issue_models['trained_model_disputes'] elif predicted_issue == "Closing your account": return issue_models['trained_model_closing_your_account'] elif predicted_issue == "Closing an account": return issue_models['trained_model_closing_account'] elif predicted_issue == "Credit Report and Monitoring Issues": return issue_models['trained_model_credit_report'] elif predicted_issue == "Legal and Threat Actions": return issue_models['trained_model_legal_and_threat'] else: raise ValueError("Invalid predicted issue category") # Custom Headers for enhancing UI Text elements def custom_header(text, level=1): if level == 1: icon_url = "https://cfpb.github.io/design-system/images/uploads/logo_vertical_071720.png" # Adjust the img style as needed (e.g., height, vertical alignment, margin) st.markdown(f"""
Uncover Consumer Trends and Automate Complaint Categorization with CFPB Insights