{ "cells": [ { "cell_type": "markdown", "id": "299ffd7f-502b-4183-9536-4e47654baae8", "metadata": { "id": "299ffd7f-502b-4183-9536-4e47654baae8" }, "source": [ "#### Importing the necessary libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "e27f22a3-f39e-4007-a048-56ccc9af915e", "metadata": { "id": "e27f22a3-f39e-4007-a048-56ccc9af915e" }, "outputs": [], "source": [ "import torch\n", "import pickle\n", "import pandas as pd\n", "from tqdm import tqdm\n", "from sklearn.pipeline import Pipeline\n", "from transformers import pipeline\n", "from sklearn.metrics import accuracy_score, precision_score" ] }, { "cell_type": "markdown", "id": "7b6553e4-339a-4003-b6f9-4aa52d2818c0", "metadata": { "id": "7b6553e4-339a-4003-b6f9-4aa52d2818c0" }, "source": [ "#### Loading 5 product models" ] }, { "cell_type": "code", "execution_count": 2, "id": "bd40a9e0-faab-4999-9ad5-f74e7ae8b272", "metadata": { "id": "bd40a9e0-faab-4999-9ad5-f74e7ae8b272" }, "outputs": [], "source": [ "with open('models/Credit_Reporting_model.pkl', 'rb') as f:\n", " trained_model_cr= pickle.load(f)\n", "\n", "with open('models/Credit_Prepaid_Card_model.pkl', 'rb') as f:\n", " trained_model_cp= pickle.load(f)\n", "\n", "with open('models/Checking_saving_model.pkl', 'rb') as f:\n", " trained_model_cs=pickle.load(f)\n", "\n", "with open('models/loan_model.pkl', 'rb') as f:\n", " trained_model_l= pickle.load(f)\n", "\n", "with open('models/Debt_model.pkl', 'rb') as f:\n", " trained_model_d= pickle.load(f)" ] }, { "cell_type": "markdown", "id": "8dd19c5a-5e4f-457c-88b7-5efa18964a8b", "metadata": { "id": "8dd19c5a-5e4f-457c-88b7-5efa18964a8b" }, "source": [ "#### Loading 17 issue models" ] }, { "cell_type": "code", "execution_count": 3, "id": "3dae2131-cfa4-4887-a30a-00d6caf547e8", "metadata": { "id": "3dae2131-cfa4-4887-a30a-00d6caf547e8" }, "outputs": [], "source": [ "# Path to the models and their corresponding names\n", "issue_model_files = {\n", " 'trained_model_account_operations': 'issue_models/account_operations_and_unauthorized_transaction_issues.pkl',\n", " 'trained_model_collect_debt': 'issue_models/attempts_to_collect_debt_not_owed.pkl',\n", " 'trained_model_closing_account': 'issue_models/closing_an_account.pkl',\n", " 'trained_model_closing_your_account': 'issue_models/closing_your_account.pkl',\n", " 'trained_model_credit_report': 'issue_models/credit_report_and_monitoring_issues.pkl',\n", " 'trained_model_lender': 'issue_models/dealing_with_your_lender_or_servicer.pkl',\n", " 'trained_model_disputes': 'issue_models/disputes_and_misrepresentations.pkl',\n", " 'trained_model_improper_use_report': 'issue_models/improper_use_of_your_report.pkl',\n", " 'trained_model_incorrect_info': 'issue_models/incorrect_information_on_your_report.pkl',\n", " 'trained_model_legal_and_threat': 'issue_models/legal_and_threat_actions.pkl',\n", " 'trained_model_managing_account': 'issue_models/managing_an_account.pkl',\n", " 'trained_model_payment_funds': 'issue_models/payment_and_funds_management.pkl',\n", " 'trained_model_investigation_wrt_issue': 'issue_models/problem_with_a_company\\'s_investigation_into_an_existing_issue.pkl',\n", " 'trained_model_investigation_wrt_problem': 'issue_models/problem_with_a_company\\'s_investigation_into_an_existing_problem.pkl',\n", " 'trained_model_credit_investigation_wrt_problem': 'issue_models/problem_with_a_credit_reporting_company\\'s_investigation_into_an_existing_problem.pkl',\n", " 'trained_model_purchase_shown': 'issue_models/problem_with_a_purchase_shown_on_your_statement.pkl',\n", " 'trained_model_notification_about_debt': 'issue_models/written_notification_about_debt.pkl',\n", "}\n", "\n", "issue_models = {}\n", "\n", "for model_name, file_path in issue_model_files.items():\n", " with open(file_path, 'rb') as f:\n", " issue_models[model_name] = pickle.load(f)" ] }, { "cell_type": "markdown", "id": "bf41b143-2ff3-4a79-83a9-afcc0d352dd0", "metadata": { "id": "bf41b143-2ff3-4a79-83a9-afcc0d352dd0", "jp-MarkdownHeadingCollapsed": true }, "source": [ "#### LLM to classify the product based on the narrative" ] }, { "cell_type": "code", "execution_count": 4, "id": "b946427b-b259-4eb2-a40b-ed7b7e476354", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 333, "referenced_widgets": [ "32e76bbe19b04f3388d6f66b2bb4b209", "ea53d18296fd439aba0587975b3560e0", "ff91165299b0498a8ccfe0cb0fb798ff", "ef388f86954b4cdb9709729e58e74683", "4b0309b321c24f13a7b108c6430cfadb", "77bdf8fb1f3a4550a32832708d01e2a3", "f78c3194c2c04ccfb7836f76d890ec0b", "6560b88ddcfa4422aef9a3d6181722fe", "dab563db420e40ee9674cda3de4e05f4", "0f345e902401423095adf1a797e37abf", "5aa3399301c34a9db067e64e928d080d", "f1d67c0aabb245f08376df9df3371dc5", "5ce1032e4f614341891c2f19e05599c2", "6c5eab8b0f2f4631ace4c772526ba312", "25927d682fb749738d82510938ea4dd7", "a06e1f69a7ba4173be0b97b4e92e6fe0", "afe123fd98484e2db408f5d7f64ede3e", "ad1b1a4514c84051a003dcddd0f19d36", "7dd8a6d910354907a847d4e2c982862c", "e082145350c9458f8bee08d68085b48a", "22c041b8409041c9b6199185531354e4", "19cb19180da440cf9f08b12b5ebab5ef", "19575eeb509e4815a41b9efcd7a0696b", "5d9aea50a8e2495d8ee1d303768738f3", "d432e1b3cb02425e808a3a3a99bf6809", "b52e8c6a28874986982c7d4cda809766", "61d78ac570f4459b8db16027555c83b3", "0a63f5147ff34de38f6e70a76989471d", "bf1814ad11f24f039f4ac098e790432f", "b1e630232c0a46c2a3ff14b03f1d8d97", "90e53eff07a34ccf8a1f6605ae33f9da", "fae74e6b5acc4a149439054534b63438", "d28d3869095b47289dd3348379ce730c", "0931ad4639fc40dd9ee0955b44fc04b6", "ab179d3c37a34970ba2f28193ef9125d", "0bb46f9442124cb3b802ed44793085c7", "7e64bf95f1f041a7970d419b14a995ad", "8cd1cc16cf744382bd9c2b1ebc10389d", "a1c0441906c34ed898a93abcd09a113f", "851af7893cfd4557a2f87403ee2d4b45", "5e39ee0442694aafa5b1081e1a0c9bb3", "e8c3a8451f374a05b37de0af49e4758d", "38add6305c0b47e0a4acf973664ea075", "399d52acab08475c8882e5ef97c963c5", "e4f6ea85600941beae695d1f3a64901b", "4f7046b2ae5e4a4f9fdb0d37c7faa121", "b25f3dcc4eaf4212a64a03432918c288", "52ef0cac021943659eee11c6602d98c2", "5540f446f1c845739f39ae8ef226dfd8", "dd7d680bc32c4a2ea5f5eb014ed329e6", "52d15f2aa0124a948ebeff07ac49c2c6", "1643c88f33f14f9082e162352de1778c", "c8775e4060994e1cb51551daacb4a959", "738eabcb160046bc8f9c1ef8999b1791", "769290cfdac44ad8ae8a4b5e4c9ded49", "9362d4245a8543ed81e46a1f03bc79b1", "099e4e2a07e24472bbea46ab869f2aab", "b1428ee09bb84364b0f9e12bb1c7820d", "bf56bff723a54d9c853b92b0bfd9b764", "e8fb649d0dd74efe99098910468fa11b", "0e009d6b18a44aa3bd6988412496a881", "0e83b89891d0472cbe60a06cea61e110", "ccd70fef463e4231a1252c669e928f0e", "eabc681b067f4cda8ea92bacada2916c", "66396dda9e404bdcae7f3805d1d50eba", "a201302105b1467784c2c73779d61b64" ] }, "id": "b946427b-b259-4eb2-a40b-ed7b7e476354", "outputId": "2e051817-f883-4873-ce39-83682533642b" }, "outputs": [], "source": [ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "# Define the pipeline for classifying product\n", "product_classifier = pipeline(\"text-classification\", model=\"Mahesh9/distil-bert-fintuned-product-cfpb-complaints\",\n", " max_length = 512, truncation = True, device = device)" ] }, { "cell_type": "markdown", "id": "0f0c40cd-f23e-4e0a-8c03-34b517a4c727", "metadata": { "id": "0f0c40cd-f23e-4e0a-8c03-34b517a4c727", "jp-MarkdownHeadingCollapsed": true }, "source": [ "#### Function to choose the appropriate product model to classify the sub-product" ] }, { "cell_type": "code", "execution_count": 5, "id": "619d9c58-1a83-4279-b452-63f3cb69998f", "metadata": { "id": "619d9c58-1a83-4279-b452-63f3cb69998f" }, "outputs": [], "source": [ "# Define a function to select the appropriate subproduct prediction model based on the predicted product\n", "def select_subproduct_model(predicted_product):\n", " if predicted_product == 'Credit Reporting' :\n", " return trained_model_cr\n", " elif predicted_product == 'Credit/Prepaid Card':\n", " return trained_model_cp\n", " elif predicted_product == 'Checking or savings account':\n", " return trained_model_cs\n", " elif predicted_product == 'Loans / Mortgage':\n", " return trained_model_l\n", " elif predicted_product == 'Debt collection':\n", " return trained_model_d\n", " else:\n", " raise ValueError(\"Invalid predicted product category\")" ] }, { "cell_type": "markdown", "id": "2f361468-ab6d-4d9a-a665-2c9dbce42e93", "metadata": { "id": "2f361468-ab6d-4d9a-a665-2c9dbce42e93" }, "source": [ "#### LLM to classify the issue based on the narrative" ] }, { "cell_type": "code", "execution_count": 6, "id": "0a8da273-8dfb-43b8-abf9-cf06871f2763", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 209, "referenced_widgets": [ "f38674633a614d2db04b5a1b63926fd3", "58721cdb25d14ac69d361fa9e0626d3b", "463da2aaa5854aa681945094635dc62c", "207ea2850e1349ccb9dd983734675593", "fca3cc8d9c0c42a9b370da6863c2205a", "8772c8bcb37a4d24ac178f1cafd81fb7", "ec2cc83997b4425d8f4c9af02cefb9dd", "1e3cf5304434412ebbb572cc17f641d8", "a98cf2410cb647bfa6136fdb63946ebb", "961e726ab3324a2ab77488ad34352e32", "883b89e3b4564583a66ebc0ca94f1e76", "4535cc65da194ed294769fc82bd90bd7", "0a34807b93954d46a4ebf61c209f74f5", "378a83b9489441c7b709e13c89827ba9", "7883bc7f255942d6a77afa8cb22f721e", "9b1351b04859480185bc723443e1ab1d", "e547d7f107034d60a4f4ce6d1266dd44", "1b2e7586cfab43b080e9871a511ec14d", "0b1da0fd31b942cda52be9ca2a7cbeb3", "7b22670fb65a4b24a0052cbd43b7e39d", "b8b2f8edda234c15a4667742898a81f9", "43c6d5a2dd2c4d67961729763b5018cb", "5f61907e18804bdba1a0f48a2b51992a", "00351f920d0d44209ded19347c943574", "bf2961ffb42645d59e1c36d4fce01cff", "6f75b09ecff347309c0c0babf75797d9", "898dd39c2be24f37b7b7337bed361494", "63832e563c3b4c9a90ce93a1a54fc6e0", "0021fe130e90449cbd96c29b115563ba", "1ed9f33276e3490ea9935892eec38022", "157a3896549e41fb86cc2bbda1907611", "80efe9fa49474eb88265e74ac799494c", "d1eec8d4515447cd813710a9e360cfca", "42e4760025154848b006ceedda2ccea3", "9fb602f9cfb84665871e6faabfeb3055", "2e03ec364c924ef69871758e5e08cd11", "0e1f85ab17464a44ae2ce57545e3639f", "562bb1a90ebe41ec86b036971fefd84d", "3adafd4730aa49f1bfb31dcf5a755603", "be5aea5ddb694623821d2204f7f46dc3", "0f3d34788db5403b8791adc406d11a4e", "76294d6c41fc4fecb449fbaba5aa7938", "33b8b8b2d9d3463ead7c1ca35591e313", "3aba5a6aa0514c12ba5677c2b56b1d2d", "880428334b0844cb850196c5a9882408", "a310c427bafb4bcab1c62dfe2851e83b", "b6fcee1aa08542a4866aa2c1fc3c73ff", "a1e1352c4a3f4001a339ede22794ca91", "e2746dfe70114294be4abb3a45fe7628", "2e721ce19bc5433ea04a687c83d77e66", "1227ce5df92b421989b657354c4ccc05", "9bbe610697624d899962feb1ffa683bd", "ea17ff9b5d634171baecd38dafa1ef42", "c167d4351c1e44538a7bb040d00fe018", "a393fecf91944fedb1b1419ce48f3b85", "ae6c8b1e77e6465183d418b254742c79", "0a3ded93837a4d2f9715cb6e2153b5de", "2aeb553c30e64b4b95019a906cc9842d", "5b4e954e1d9f40b789d4db3d0bd909c8", "c4cb4cea0b9c49f6ad1aa33320343fd8", "9420f56e109f4f00a19f397baae56fed", "48464a6b67aa46b2b09d05757a77fbd4", "ab5ca9fcbde94469a16a355fcddccde2", "68e410ec912f40f39c122cf511b23152", "22d6ae0e16d1443987a3319a298159ef", "a71baf8630f548adac12893bc63be69a" ] }, "id": "0a8da273-8dfb-43b8-abf9-cf06871f2763", "outputId": "6551f092-b9a2-4574-b270-94a3cf78a21b" }, "outputs": [], "source": [ "# Define the pipeline for classifying issue\n", "issue_classifier = pipeline(\"text-classification\", model=\"Mahesh9/distil-bert-fintuned-issues-cfpb-complaints\",\n", " max_length = 512, truncation = True, device = device)" ] }, { "cell_type": "markdown", "id": "df05c0c0-c4cc-4287-b129-75f60dd88348", "metadata": { "id": "df05c0c0-c4cc-4287-b129-75f60dd88348" }, "source": [ "#### Function to choose the appropriate issue model to classify the sub-issue" ] }, { "cell_type": "code", "execution_count": 7, "id": "f55a787b-ce6a-49dd-96dd-1cbfda8a68a5", "metadata": { "id": "f55a787b-ce6a-49dd-96dd-1cbfda8a68a5" }, "outputs": [], "source": [ "# Define a function to select the appropriate subissue prediction model based on the predicted issue\n", "def select_subissue_model(predicted_issue):\n", " if predicted_issue == \"Problem with a company's investigation into an existing problem\":\n", " return issue_models['trained_model_investigation_wrt_problem']\n", "\n", " elif predicted_issue == \"Problem with a credit reporting company's investigation into an existing problem\":\n", " return issue_models['trained_model_credit_investigation_wrt_problem']\n", "\n", " elif predicted_issue == \"Problem with a company's investigation into an existing issue\":\n", " return issue_models['trained_model_investigation_wrt_issue']\n", "\n", " elif predicted_issue == \"Problem with a purchase shown on your statement\":\n", " return issue_models['trained_model_purchase_shown']\n", "\n", " elif predicted_issue == \"Incorrect information on your report\":\n", " return issue_models['trained_model_incorrect_info']\n", "\n", " elif predicted_issue == \"Improper use of your report\":\n", " return issue_models['trained_model_improper_use_report']\n", "\n", " elif predicted_issue == \"Account Operations and Unauthorized Transaction Issues\":\n", " return issue_models['trained_model_account_operations']\n", "\n", " elif predicted_issue == \"Payment and Funds Management\":\n", " return issue_models['trained_model_payment_funds']\n", "\n", " elif predicted_issue == \"Managing an account\":\n", " return issue_models['trained_model_managing_account']\n", "\n", " elif predicted_issue == \"Attempts to collect debt not owed\":\n", " return issue_models['trained_model_collect_debt']\n", "\n", " elif predicted_issue == \"Written notification about debt\":\n", " return issue_models['trained_model_notification_about_debt']\n", "\n", " elif predicted_issue == \"Dealing with your lender or servicer\":\n", " return issue_models['trained_model_lender']\n", "\n", " elif predicted_issue == \"Disputes and Misrepresentations\":\n", " return issue_models['trained_model_disputes']\n", "\n", " elif predicted_issue == \"Closing your account\":\n", " return issue_models['trained_model_closing_your_account']\n", "\n", " elif predicted_issue == \"Closing an account\":\n", " return issue_models['trained_model_closing_account']\n", "\n", " elif predicted_issue == \"Credit Report and Monitoring Issues\":\n", " return issue_models['trained_model_credit_report']\n", "\n", " elif predicted_issue == \"Legal and Threat Actions\":\n", " return issue_models['trained_model_legal_and_threat']\n", "\n", " else:\n", " raise ValueError(\"Invalid predicted issue category\")" ] }, { "cell_type": "markdown", "id": "d87974e1-1bf8-44ea-bfee-75de8e2960b4", "metadata": { "id": "d87974e1-1bf8-44ea-bfee-75de8e2960b4" }, "source": [ "#### Driver code to classify the complaint into various categories" ] }, { "cell_type": "code", "execution_count": 8, "id": "dc785511-d68f-4341-a080-23f8f27eefc4", "metadata": { "id": "dc785511-d68f-4341-a080-23f8f27eefc4" }, "outputs": [], "source": [ "def classify_complaint(narrative):\n", " # Predict product category\n", " predicted_product = product_classifier(narrative)[0]['label']\n", "\n", " # Load the appropriate subproduct prediction model\n", " subproduct_model = select_subproduct_model(predicted_product)\n", " # Predict subproduct category using the selected model\n", " predicted_subproduct = subproduct_model.predict([narrative])[0]\n", "\n", "\n", "\n", " # Predict the appropriate issue category using the narrative\n", " predicted_issue = issue_classifier(narrative)[0]['label']\n", "\n", " # Load the appropriate subissue prediction model\n", " subissue_model = select_subissue_model(predicted_issue)\n", " # Predict subissue category using the selected model\n", " predicted_subissue = subissue_model.predict([narrative])[0]\n", "\n", " return {\n", " \"Product\" : predicted_product,\n", " \"Sub-product\" : predicted_subproduct,\n", " \"Issue\" : predicted_issue,\n", " \"Sub-issue\" : predicted_subissue\n", " }" ] }, { "cell_type": "code", "execution_count": 9, "id": "982521ea-364e-4521-889e-fe586c186701", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "982521ea-364e-4521-889e-fe586c186701", "outputId": "57634353-228e-4333-cb04-4d0fcdbe55f1" }, "outputs": [ { "data": { "text/plain": [ "{'Product': 'Credit/Prepaid Card',\n", " 'Sub-product': 'General-purpose credit card or charge card',\n", " 'Issue': \"Problem with a company's investigation into an existing problem\",\n", " 'Sub-issue': 'Was not notified of investigation status or results'}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "narrative = \"\"\"It is absurd that I have consistently made timely payments for this account and have never been\n", " overdue. I kindly request that you promptly update my account to reflect this accurately.\"\"\"\n", "\n", "classify_complaint(narrative)" ] }, { "cell_type": "code", "execution_count": 10, "id": "a80b68d8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Complaint Narrative | \n", "Actual Product | \n", "Predicted Product | \n", "Actual Sub-product | \n", "Predicted Sub-product | \n", "Actual Issue | \n", "Predicted Issue | \n", "Actual Sub-issue | \n", "Predicted Sub-issue | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "2nd and final notice Notice to agent is notice... | \n", "Credit Reporting | \n", "Credit Reporting | \n", "Credit reporting | \n", "Credit reporting | \n", "Problem with a company's investigation into an... | \n", "Improper use of your report | \n", "Was not notified of investigation status or re... | \n", "Reporting company used your report improperly | \n", "
1 | \n", "It has come to my attention that this bankrupt... | \n", "Credit Reporting | \n", "Credit Reporting | \n", "Credit reporting | \n", "Credit reporting | \n", "Incorrect information on your report | \n", "Incorrect information on your report | \n", "Information belongs to someone else | \n", "Information belongs to someone else | \n", "
2 | \n", "per 15 1666B no lates should be furnished per ... | \n", "Credit Reporting | \n", "Credit Reporting | \n", "Credit reporting | \n", "Credit reporting | \n", "Incorrect information on your report | \n", "Improper use of your report | \n", "Account status incorrect | \n", "Reporting company used your report improperly | \n", "
3 | \n", "XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX ... | \n", "Credit Reporting | \n", "Credit Reporting | \n", "Credit reporting | \n", "Credit reporting | \n", "Problem with a credit reporting company's inve... | \n", "Improper use of your report | \n", "Investigation took more than 30 days | \n", "Reporting company used your report improperly | \n", "
4 | \n", "On XX/XX/2022 I sent a letter disputing a bill... | \n", "Credit/Prepaid Card | \n", "Credit Reporting | \n", "General-purpose credit card or charge card | \n", "Credit reporting | \n", "Problem with a purchase shown on your statement | \n", "Attempts to collect debt not owed | \n", "Credit card company isn't resolving a dispute ... | \n", "Debt is not yours | \n", "