{ "cells": [ { "cell_type": "markdown", "id": "299ffd7f-502b-4183-9536-4e47654baae8", "metadata": { "id": "299ffd7f-502b-4183-9536-4e47654baae8" }, "source": [ "#### Importing the necessary libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "e27f22a3-f39e-4007-a048-56ccc9af915e", "metadata": { "id": "e27f22a3-f39e-4007-a048-56ccc9af915e" }, "outputs": [], "source": [ "import torch\n", "import pickle\n", "import pandas as pd\n", "from tqdm import tqdm\n", "from sklearn.pipeline import Pipeline\n", "from transformers import pipeline\n", "from sklearn.metrics import accuracy_score, precision_score" ] }, { "cell_type": "markdown", "id": "7b6553e4-339a-4003-b6f9-4aa52d2818c0", "metadata": { "id": "7b6553e4-339a-4003-b6f9-4aa52d2818c0" }, "source": [ "#### Loading 5 product models" ] }, { "cell_type": "code", "execution_count": 2, "id": "bd40a9e0-faab-4999-9ad5-f74e7ae8b272", "metadata": { "id": "bd40a9e0-faab-4999-9ad5-f74e7ae8b272" }, "outputs": [], "source": [ "with open('models/Credit_Reporting_model.pkl', 'rb') as f:\n", " trained_model_cr= pickle.load(f)\n", "\n", "with open('models/Credit_Prepaid_Card_model.pkl', 'rb') as f:\n", " trained_model_cp= pickle.load(f)\n", "\n", "with open('models/Checking_saving_model.pkl', 'rb') as f:\n", " trained_model_cs=pickle.load(f)\n", "\n", "with open('models/loan_model.pkl', 'rb') as f:\n", " trained_model_l= pickle.load(f)\n", "\n", "with open('models/Debt_model.pkl', 'rb') as f:\n", " trained_model_d= pickle.load(f)" ] }, { "cell_type": "markdown", "id": "8dd19c5a-5e4f-457c-88b7-5efa18964a8b", "metadata": { "id": "8dd19c5a-5e4f-457c-88b7-5efa18964a8b" }, "source": [ "#### Loading 17 issue models" ] }, { "cell_type": "code", "execution_count": 3, "id": "3dae2131-cfa4-4887-a30a-00d6caf547e8", "metadata": { "id": "3dae2131-cfa4-4887-a30a-00d6caf547e8" }, "outputs": [], "source": [ "# Path to the models and their corresponding names\n", "issue_model_files = {\n", " 'trained_model_account_operations': 'issue_models/account_operations_and_unauthorized_transaction_issues.pkl',\n", " 'trained_model_collect_debt': 'issue_models/attempts_to_collect_debt_not_owed.pkl',\n", " 'trained_model_closing_account': 'issue_models/closing_an_account.pkl',\n", " 'trained_model_closing_your_account': 'issue_models/closing_your_account.pkl',\n", " 'trained_model_credit_report': 'issue_models/credit_report_and_monitoring_issues.pkl',\n", " 'trained_model_lender': 'issue_models/dealing_with_your_lender_or_servicer.pkl',\n", " 'trained_model_disputes': 'issue_models/disputes_and_misrepresentations.pkl',\n", " 'trained_model_improper_use_report': 'issue_models/improper_use_of_your_report.pkl',\n", " 'trained_model_incorrect_info': 'issue_models/incorrect_information_on_your_report.pkl',\n", " 'trained_model_legal_and_threat': 'issue_models/legal_and_threat_actions.pkl',\n", " 'trained_model_managing_account': 'issue_models/managing_an_account.pkl',\n", " 'trained_model_payment_funds': 'issue_models/payment_and_funds_management.pkl',\n", " 'trained_model_investigation_wrt_issue': 'issue_models/problem_with_a_company\\'s_investigation_into_an_existing_issue.pkl',\n", " 'trained_model_investigation_wrt_problem': 'issue_models/problem_with_a_company\\'s_investigation_into_an_existing_problem.pkl',\n", " 'trained_model_credit_investigation_wrt_problem': 'issue_models/problem_with_a_credit_reporting_company\\'s_investigation_into_an_existing_problem.pkl',\n", " 'trained_model_purchase_shown': 'issue_models/problem_with_a_purchase_shown_on_your_statement.pkl',\n", " 'trained_model_notification_about_debt': 'issue_models/written_notification_about_debt.pkl',\n", "}\n", "\n", "issue_models = {}\n", "\n", "for model_name, file_path in issue_model_files.items():\n", " with open(file_path, 'rb') as f:\n", " issue_models[model_name] = pickle.load(f)" ] }, { "cell_type": "markdown", "id": "bf41b143-2ff3-4a79-83a9-afcc0d352dd0", "metadata": { "id": "bf41b143-2ff3-4a79-83a9-afcc0d352dd0", "jp-MarkdownHeadingCollapsed": true }, "source": [ "#### LLM to classify the product based on the narrative" ] }, { "cell_type": "code", "execution_count": 4, "id": "b946427b-b259-4eb2-a40b-ed7b7e476354", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 333, "referenced_widgets": [ "32e76bbe19b04f3388d6f66b2bb4b209", "ea53d18296fd439aba0587975b3560e0", "ff91165299b0498a8ccfe0cb0fb798ff", "ef388f86954b4cdb9709729e58e74683", "4b0309b321c24f13a7b108c6430cfadb", "77bdf8fb1f3a4550a32832708d01e2a3", "f78c3194c2c04ccfb7836f76d890ec0b", "6560b88ddcfa4422aef9a3d6181722fe", "dab563db420e40ee9674cda3de4e05f4", "0f345e902401423095adf1a797e37abf", "5aa3399301c34a9db067e64e928d080d", "f1d67c0aabb245f08376df9df3371dc5", "5ce1032e4f614341891c2f19e05599c2", "6c5eab8b0f2f4631ace4c772526ba312", "25927d682fb749738d82510938ea4dd7", "a06e1f69a7ba4173be0b97b4e92e6fe0", "afe123fd98484e2db408f5d7f64ede3e", "ad1b1a4514c84051a003dcddd0f19d36", "7dd8a6d910354907a847d4e2c982862c", "e082145350c9458f8bee08d68085b48a", "22c041b8409041c9b6199185531354e4", "19cb19180da440cf9f08b12b5ebab5ef", "19575eeb509e4815a41b9efcd7a0696b", "5d9aea50a8e2495d8ee1d303768738f3", "d432e1b3cb02425e808a3a3a99bf6809", "b52e8c6a28874986982c7d4cda809766", "61d78ac570f4459b8db16027555c83b3", "0a63f5147ff34de38f6e70a76989471d", "bf1814ad11f24f039f4ac098e790432f", "b1e630232c0a46c2a3ff14b03f1d8d97", "90e53eff07a34ccf8a1f6605ae33f9da", "fae74e6b5acc4a149439054534b63438", "d28d3869095b47289dd3348379ce730c", "0931ad4639fc40dd9ee0955b44fc04b6", "ab179d3c37a34970ba2f28193ef9125d", "0bb46f9442124cb3b802ed44793085c7", "7e64bf95f1f041a7970d419b14a995ad", "8cd1cc16cf744382bd9c2b1ebc10389d", "a1c0441906c34ed898a93abcd09a113f", "851af7893cfd4557a2f87403ee2d4b45", "5e39ee0442694aafa5b1081e1a0c9bb3", "e8c3a8451f374a05b37de0af49e4758d", "38add6305c0b47e0a4acf973664ea075", "399d52acab08475c8882e5ef97c963c5", "e4f6ea85600941beae695d1f3a64901b", "4f7046b2ae5e4a4f9fdb0d37c7faa121", "b25f3dcc4eaf4212a64a03432918c288", "52ef0cac021943659eee11c6602d98c2", "5540f446f1c845739f39ae8ef226dfd8", "dd7d680bc32c4a2ea5f5eb014ed329e6", "52d15f2aa0124a948ebeff07ac49c2c6", "1643c88f33f14f9082e162352de1778c", "c8775e4060994e1cb51551daacb4a959", "738eabcb160046bc8f9c1ef8999b1791", "769290cfdac44ad8ae8a4b5e4c9ded49", "9362d4245a8543ed81e46a1f03bc79b1", "099e4e2a07e24472bbea46ab869f2aab", "b1428ee09bb84364b0f9e12bb1c7820d", "bf56bff723a54d9c853b92b0bfd9b764", "e8fb649d0dd74efe99098910468fa11b", "0e009d6b18a44aa3bd6988412496a881", "0e83b89891d0472cbe60a06cea61e110", "ccd70fef463e4231a1252c669e928f0e", "eabc681b067f4cda8ea92bacada2916c", "66396dda9e404bdcae7f3805d1d50eba", "a201302105b1467784c2c73779d61b64" ] }, "id": "b946427b-b259-4eb2-a40b-ed7b7e476354", "outputId": "2e051817-f883-4873-ce39-83682533642b" }, "outputs": [], "source": [ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "# Define the pipeline for classifying product\n", "product_classifier = pipeline(\"text-classification\", model=\"Mahesh9/distil-bert-fintuned-product-cfpb-complaints\",\n", " max_length = 512, truncation = True, device = device)" ] }, { "cell_type": "markdown", "id": "0f0c40cd-f23e-4e0a-8c03-34b517a4c727", "metadata": { "id": "0f0c40cd-f23e-4e0a-8c03-34b517a4c727", "jp-MarkdownHeadingCollapsed": true }, "source": [ "#### Function to choose the appropriate product model to classify the sub-product" ] }, { "cell_type": "code", "execution_count": 5, "id": "619d9c58-1a83-4279-b452-63f3cb69998f", "metadata": { "id": "619d9c58-1a83-4279-b452-63f3cb69998f" }, "outputs": [], "source": [ "# Define a function to select the appropriate subproduct prediction model based on the predicted product\n", "def select_subproduct_model(predicted_product):\n", " if predicted_product == 'Credit Reporting' :\n", " return trained_model_cr\n", " elif predicted_product == 'Credit/Prepaid Card':\n", " return trained_model_cp\n", " elif predicted_product == 'Checking or savings account':\n", " return trained_model_cs\n", " elif predicted_product == 'Loans / Mortgage':\n", " return trained_model_l\n", " elif predicted_product == 'Debt collection':\n", " return trained_model_d\n", " else:\n", " raise ValueError(\"Invalid predicted product category\")" ] }, { "cell_type": "markdown", "id": "2f361468-ab6d-4d9a-a665-2c9dbce42e93", "metadata": { "id": "2f361468-ab6d-4d9a-a665-2c9dbce42e93" }, "source": [ "#### LLM to classify the issue based on the narrative" ] }, { "cell_type": "code", "execution_count": 6, "id": "0a8da273-8dfb-43b8-abf9-cf06871f2763", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 209, "referenced_widgets": [ "f38674633a614d2db04b5a1b63926fd3", "58721cdb25d14ac69d361fa9e0626d3b", "463da2aaa5854aa681945094635dc62c", "207ea2850e1349ccb9dd983734675593", "fca3cc8d9c0c42a9b370da6863c2205a", "8772c8bcb37a4d24ac178f1cafd81fb7", "ec2cc83997b4425d8f4c9af02cefb9dd", "1e3cf5304434412ebbb572cc17f641d8", "a98cf2410cb647bfa6136fdb63946ebb", "961e726ab3324a2ab77488ad34352e32", "883b89e3b4564583a66ebc0ca94f1e76", "4535cc65da194ed294769fc82bd90bd7", "0a34807b93954d46a4ebf61c209f74f5", "378a83b9489441c7b709e13c89827ba9", "7883bc7f255942d6a77afa8cb22f721e", "9b1351b04859480185bc723443e1ab1d", "e547d7f107034d60a4f4ce6d1266dd44", "1b2e7586cfab43b080e9871a511ec14d", "0b1da0fd31b942cda52be9ca2a7cbeb3", "7b22670fb65a4b24a0052cbd43b7e39d", "b8b2f8edda234c15a4667742898a81f9", "43c6d5a2dd2c4d67961729763b5018cb", "5f61907e18804bdba1a0f48a2b51992a", "00351f920d0d44209ded19347c943574", "bf2961ffb42645d59e1c36d4fce01cff", "6f75b09ecff347309c0c0babf75797d9", "898dd39c2be24f37b7b7337bed361494", "63832e563c3b4c9a90ce93a1a54fc6e0", "0021fe130e90449cbd96c29b115563ba", "1ed9f33276e3490ea9935892eec38022", "157a3896549e41fb86cc2bbda1907611", "80efe9fa49474eb88265e74ac799494c", "d1eec8d4515447cd813710a9e360cfca", "42e4760025154848b006ceedda2ccea3", "9fb602f9cfb84665871e6faabfeb3055", "2e03ec364c924ef69871758e5e08cd11", "0e1f85ab17464a44ae2ce57545e3639f", "562bb1a90ebe41ec86b036971fefd84d", "3adafd4730aa49f1bfb31dcf5a755603", "be5aea5ddb694623821d2204f7f46dc3", "0f3d34788db5403b8791adc406d11a4e", "76294d6c41fc4fecb449fbaba5aa7938", "33b8b8b2d9d3463ead7c1ca35591e313", "3aba5a6aa0514c12ba5677c2b56b1d2d", "880428334b0844cb850196c5a9882408", "a310c427bafb4bcab1c62dfe2851e83b", "b6fcee1aa08542a4866aa2c1fc3c73ff", "a1e1352c4a3f4001a339ede22794ca91", "e2746dfe70114294be4abb3a45fe7628", "2e721ce19bc5433ea04a687c83d77e66", "1227ce5df92b421989b657354c4ccc05", "9bbe610697624d899962feb1ffa683bd", "ea17ff9b5d634171baecd38dafa1ef42", "c167d4351c1e44538a7bb040d00fe018", "a393fecf91944fedb1b1419ce48f3b85", "ae6c8b1e77e6465183d418b254742c79", "0a3ded93837a4d2f9715cb6e2153b5de", "2aeb553c30e64b4b95019a906cc9842d", "5b4e954e1d9f40b789d4db3d0bd909c8", "c4cb4cea0b9c49f6ad1aa33320343fd8", "9420f56e109f4f00a19f397baae56fed", "48464a6b67aa46b2b09d05757a77fbd4", "ab5ca9fcbde94469a16a355fcddccde2", "68e410ec912f40f39c122cf511b23152", "22d6ae0e16d1443987a3319a298159ef", "a71baf8630f548adac12893bc63be69a" ] }, "id": "0a8da273-8dfb-43b8-abf9-cf06871f2763", "outputId": "6551f092-b9a2-4574-b270-94a3cf78a21b" }, "outputs": [], "source": [ "# Define the pipeline for classifying issue\n", "issue_classifier = pipeline(\"text-classification\", model=\"Mahesh9/distil-bert-fintuned-issues-cfpb-complaints\",\n", " max_length = 512, truncation = True, device = device)" ] }, { "cell_type": "markdown", "id": "df05c0c0-c4cc-4287-b129-75f60dd88348", "metadata": { "id": "df05c0c0-c4cc-4287-b129-75f60dd88348" }, "source": [ "#### Function to choose the appropriate issue model to classify the sub-issue" ] }, { "cell_type": "code", "execution_count": 7, "id": "f55a787b-ce6a-49dd-96dd-1cbfda8a68a5", "metadata": { "id": "f55a787b-ce6a-49dd-96dd-1cbfda8a68a5" }, "outputs": [], "source": [ "# Define a function to select the appropriate subissue prediction model based on the predicted issue\n", "def select_subissue_model(predicted_issue):\n", " if predicted_issue == \"Problem with a company's investigation into an existing problem\":\n", " return issue_models['trained_model_investigation_wrt_problem']\n", "\n", " elif predicted_issue == \"Problem with a credit reporting company's investigation into an existing problem\":\n", " return issue_models['trained_model_credit_investigation_wrt_problem']\n", "\n", " elif predicted_issue == \"Problem with a company's investigation into an existing issue\":\n", " return issue_models['trained_model_investigation_wrt_issue']\n", "\n", " elif predicted_issue == \"Problem with a purchase shown on your statement\":\n", " return issue_models['trained_model_purchase_shown']\n", "\n", " elif predicted_issue == \"Incorrect information on your report\":\n", " return issue_models['trained_model_incorrect_info']\n", "\n", " elif predicted_issue == \"Improper use of your report\":\n", " return issue_models['trained_model_improper_use_report']\n", "\n", " elif predicted_issue == \"Account Operations and Unauthorized Transaction Issues\":\n", " return issue_models['trained_model_account_operations']\n", "\n", " elif predicted_issue == \"Payment and Funds Management\":\n", " return issue_models['trained_model_payment_funds']\n", "\n", " elif predicted_issue == \"Managing an account\":\n", " return issue_models['trained_model_managing_account']\n", "\n", " elif predicted_issue == \"Attempts to collect debt not owed\":\n", " return issue_models['trained_model_collect_debt']\n", "\n", " elif predicted_issue == \"Written notification about debt\":\n", " return issue_models['trained_model_notification_about_debt']\n", "\n", " elif predicted_issue == \"Dealing with your lender or servicer\":\n", " return issue_models['trained_model_lender']\n", "\n", " elif predicted_issue == \"Disputes and Misrepresentations\":\n", " return issue_models['trained_model_disputes']\n", "\n", " elif predicted_issue == \"Closing your account\":\n", " return issue_models['trained_model_closing_your_account']\n", "\n", " elif predicted_issue == \"Closing an account\":\n", " return issue_models['trained_model_closing_account']\n", "\n", " elif predicted_issue == \"Credit Report and Monitoring Issues\":\n", " return issue_models['trained_model_credit_report']\n", "\n", " elif predicted_issue == \"Legal and Threat Actions\":\n", " return issue_models['trained_model_legal_and_threat']\n", "\n", " else:\n", " raise ValueError(\"Invalid predicted issue category\")" ] }, { "cell_type": "markdown", "id": "d87974e1-1bf8-44ea-bfee-75de8e2960b4", "metadata": { "id": "d87974e1-1bf8-44ea-bfee-75de8e2960b4" }, "source": [ "#### Driver code to classify the complaint into various categories" ] }, { "cell_type": "code", "execution_count": 8, "id": "dc785511-d68f-4341-a080-23f8f27eefc4", "metadata": { "id": "dc785511-d68f-4341-a080-23f8f27eefc4" }, "outputs": [], "source": [ "def classify_complaint(narrative):\n", " # Predict product category\n", " predicted_product = product_classifier(narrative)[0]['label']\n", "\n", " # Load the appropriate subproduct prediction model\n", " subproduct_model = select_subproduct_model(predicted_product)\n", " # Predict subproduct category using the selected model\n", " predicted_subproduct = subproduct_model.predict([narrative])[0]\n", "\n", "\n", "\n", " # Predict the appropriate issue category using the narrative\n", " predicted_issue = issue_classifier(narrative)[0]['label']\n", "\n", " # Load the appropriate subissue prediction model\n", " subissue_model = select_subissue_model(predicted_issue)\n", " # Predict subissue category using the selected model\n", " predicted_subissue = subissue_model.predict([narrative])[0]\n", "\n", " return {\n", " \"Product\" : predicted_product,\n", " \"Sub-product\" : predicted_subproduct,\n", " \"Issue\" : predicted_issue,\n", " \"Sub-issue\" : predicted_subissue\n", " }" ] }, { "cell_type": "code", "execution_count": 9, "id": "982521ea-364e-4521-889e-fe586c186701", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "982521ea-364e-4521-889e-fe586c186701", "outputId": "57634353-228e-4333-cb04-4d0fcdbe55f1" }, "outputs": [ { "data": { "text/plain": [ "{'Product': 'Credit/Prepaid Card',\n", " 'Sub-product': 'General-purpose credit card or charge card',\n", " 'Issue': \"Problem with a company's investigation into an existing problem\",\n", " 'Sub-issue': 'Was not notified of investigation status or results'}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "narrative = \"\"\"It is absurd that I have consistently made timely payments for this account and have never been\n", " overdue. I kindly request that you promptly update my account to reflect this accurately.\"\"\"\n", "\n", "classify_complaint(narrative)" ] }, { "cell_type": "code", "execution_count": 10, "id": "a80b68d8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Complaint NarrativeActual ProductPredicted ProductActual Sub-productPredicted Sub-productActual IssuePredicted IssueActual Sub-issuePredicted Sub-issue
02nd and final notice Notice to agent is notice...Credit ReportingCredit ReportingCredit reportingCredit reportingProblem with a company's investigation into an...Improper use of your reportWas not notified of investigation status or re...Reporting company used your report improperly
1It has come to my attention that this bankrupt...Credit ReportingCredit ReportingCredit reportingCredit reportingIncorrect information on your reportIncorrect information on your reportInformation belongs to someone elseInformation belongs to someone else
2per 15 1666B no lates should be furnished per ...Credit ReportingCredit ReportingCredit reportingCredit reportingIncorrect information on your reportImproper use of your reportAccount status incorrectReporting company used your report improperly
3XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX ...Credit ReportingCredit ReportingCredit reportingCredit reportingProblem with a credit reporting company's inve...Improper use of your reportInvestigation took more than 30 daysReporting company used your report improperly
4On XX/XX/2022 I sent a letter disputing a bill...Credit/Prepaid CardCredit ReportingGeneral-purpose credit card or charge cardCredit reportingProblem with a purchase shown on your statementAttempts to collect debt not owedCredit card company isn't resolving a dispute ...Debt is not yours
\n", "
" ], "text/plain": [ " Complaint Narrative Actual Product \\\n", "0 2nd and final notice Notice to agent is notice... Credit Reporting \n", "1 It has come to my attention that this bankrupt... Credit Reporting \n", "2 per 15 1666B no lates should be furnished per ... Credit Reporting \n", "3 XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX, XXXX ... Credit Reporting \n", "4 On XX/XX/2022 I sent a letter disputing a bill... Credit/Prepaid Card \n", "\n", " Predicted Product Actual Sub-product \\\n", "0 Credit Reporting Credit reporting \n", "1 Credit Reporting Credit reporting \n", "2 Credit Reporting Credit reporting \n", "3 Credit Reporting Credit reporting \n", "4 Credit Reporting General-purpose credit card or charge card \n", "\n", " Predicted Sub-product Actual Issue \\\n", "0 Credit reporting Problem with a company's investigation into an... \n", "1 Credit reporting Incorrect information on your report \n", "2 Credit reporting Incorrect information on your report \n", "3 Credit reporting Problem with a credit reporting company's inve... \n", "4 Credit reporting Problem with a purchase shown on your statement \n", "\n", " Predicted Issue \\\n", "0 Improper use of your report \n", "1 Incorrect information on your report \n", "2 Improper use of your report \n", "3 Improper use of your report \n", "4 Attempts to collect debt not owed \n", "\n", " Actual Sub-issue \\\n", "0 Was not notified of investigation status or re... \n", "1 Information belongs to someone else \n", "2 Account status incorrect \n", "3 Investigation took more than 30 days \n", "4 Credit card company isn't resolving a dispute ... \n", "\n", " Predicted Sub-issue \n", "0 Reporting company used your report improperly \n", "1 Information belongs to someone else \n", "2 Reporting company used your report improperly \n", "3 Reporting company used your report improperly \n", "4 Debt is not yours " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_df = pd.read_csv('../data_splits/test_data_results.csv')\n", "test_df.head()" ] }, { "cell_type": "code", "execution_count": 29, "id": "dd18d7f1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Complaint Narrative': 'I have been on SAVE repayment plab for over XXXX month, which I was told would have interest waived, unfortunately this has not happened, instead have been accruing nearly XXXXXXXX XXXX interest per month Please help',\n", " 'Actual Product': 'Loans / Mortgage',\n", " 'Predicted Product': 'Loans / Mortgage',\n", " 'Actual Sub-product': 'Federal student loan servicing',\n", " 'Predicted Sub-product': 'Federal student loan servicing',\n", " 'Actual Issue': 'Dealing with your lender or servicer',\n", " 'Predicted Issue': 'Disputes and Misrepresentations',\n", " 'Actual Sub-issue': 'Trouble with how payments are being handled',\n", " 'Predicted Sub-issue': 'Problem with fees'}" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "a = random.randint(0, 100)\n", "test_df[(test_df['Actual Product'] == 'Loans / Mortgage') & \n", " (test_df['Predicted Product'] == 'Loans / Mortgage') &\n", " (test_df['Actual Issue'] != test_df['Predicted Issue']) \n", " ].iloc[a,:].to_dict()" ] }, { "cell_type": "code", "execution_count": 28, "id": "ed261949", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Complaint Narrative': 'I have contacted Alpine Credit XXXX XXXX Multiple times to verify the debt or send debt validation with no response first on XX/XX/XXXX with no response after 30 days, I sent another letter in XX/XX/XXXX stating it has been over 30 days since my letter and I have not received a response, they marked the account as disputed which means they have received my letters but have yet to contact me. I contacted them a third time XX/XX/XXXX and still no response.',\n", " 'Actual Product': 'Debt collection',\n", " 'Predicted Product': 'Debt collection',\n", " 'Actual Sub-product': 'Medical debt',\n", " 'Predicted Sub-product': 'Credit card debt',\n", " 'Actual Issue': 'Written notification about debt',\n", " 'Predicted Issue': 'Written notification about debt',\n", " 'Actual Sub-issue': \"Didn't receive enough information to verify debt\",\n", " 'Predicted Sub-issue': \"Didn't receive enough information to verify debt\"}" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "test_df[(test_df['Actual Product'] == 'Debt collection') & \n", " (test_df['Predicted Product'] == 'Debt collection') &\n", " (test_df['Actual Issue'] == test_df['Predicted Issue']) &\n", " (test_df['Actual Sub-issue'] == test_df['Predicted Sub-issue']) \n", " ].iloc[random.randint(0, 100),:].to_dict()" ] }, { "cell_type": "code", "execution_count": null, "id": "2f40eda7", "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "I have called MOHELA many times trying to get an update on my payments for public loan forgiveness.\n", "When my loan was transferred to them over a year ago, I had 98 payments.\n", "I have payments that under the temporary waiver count towards loan forgiveness and have not been updated or applied.\n", "My payments have almost a year that have not been updated on their systems.\n", "A few months is understandable, but over a year is unacceptable.\n", "I have enough payments if they count the payments that have not been counted to get my loans forgiven under the public service program.\n", "I have been working as a XXXX for 13 years.\n", "The lack of accountability on their part has cause me a lot of stress and is affecting my mental health.\n", "These payments should have been counted a long time ago. Calling them on the phone is a nightmare.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 17, "id": "7efb17e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Product': 'Credit Reporting',\n", " 'Sub-product': 'Credit reporting',\n", " 'Issue': 'Improper use of your report',\n", " 'Sub-issue': 'Reporting company used your report improperly'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "narrative_1 = \"\"\"in accordance with the Fair Credit Reporting Act this creditor has violated my Rights! Under Act 15 U.S.C. 1681 section 602 says \" I have the right to Privacy \\'\\'. 15 U.S.C 1681 section 604 A section 2 \\'It also states a consumer reporting agency can not refurnish an account without my written instructions \\'\\'. Under 15 U.S.C 1666B \" A creditor may not treat a payment on a credit card account under an open and consumer credit plan as late for any purpose \\'\\'.\"\"\"\n", "classify_complaint(narrative_1)" ] }, { "cell_type": "code", "execution_count": 26, "id": "a6df5fd5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Product': 'Loans / Mortgage',\n", " 'Sub-product': 'Federal student loan servicing',\n", " 'Issue': 'Dealing with your lender or servicer',\n", " 'Sub-issue': 'Received bad information about your loan'}" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "narrative_2 = \"\"\" Despite numerous calls, MOHELA hasn't updated my account with payments eligible for public loan forgiveness.\n", " This lack of action over a year is causing significant stress.\"\"\"\n", "\n", "classify_complaint(narrative_2)" ] }, { "cell_type": "code", "execution_count": null, "id": "3bd1cbd4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "6a68ebbc-de80-4176-ac38-bfe5fd84b86c", "metadata": { "id": "6a68ebbc-de80-4176-ac38-bfe5fd84b86c", "jp-MarkdownHeadingCollapsed": true }, "source": [ "### Evaluating on the test set" ] }, { "cell_type": "code", "execution_count": 17, "id": "w76l5Y606Xyc", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 275, "referenced_widgets": [ "215ca9707ca345daaa2f258f0ac448b4", "9b9e28f7d25c47ffb27964235abdbab6", "2c847326537e4c6180fb06f5e2149edd", "c33233f6538149d9aa4e5dc12606e8d8", "a4e9b673f63b47f4ba3cf0b785149f92", "be8d38ead5344c0fa0e3e8f356d1ee21", "1247d5f2624e4bdb9260b86c3040558a", "262e189bdf254433822527730e0858fe", "5cc60b7f47274fb3a21ffd1d58269729", "a5dd0ebfc34645e19a8e439012353d92", "ae77e7d380374288b5ba80aa7afe4c53", "9d1ead93dcca4b1493bb558cb1e95045", "671c9eaf696e49529817248842570983", "c3adeb56cb2d4ca79f4227c334cfb6f2", "fe3589a0e5f947f087977719bff83f34", "706310ab6a4d412cbaf978e5eef0d209", "577f070a1f864bacbc0fc855e1729f74", "1fe072e24afa4496b9fd000261460933", "167fa10ab49c46968c34bb5d14ee2931", "a723803381aa412b8cb5b52f2310fe8e", "8b848a78513346bbb41f77b81b86d227", "a6e06a1b728d4e5e88b27f6859b29459" ] }, "id": "w76l5Y606Xyc", "outputId": "f3830c46-4340-4049-8072-f0f138d385a0" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "215ca9707ca345daaa2f258f0ac448b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0 examples [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9d1ead93dcca4b1493bb558cb1e95045", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/61880 [00:00