Spaces:

Mahesh9
/

CFPB-Complaint-Classifier

Sleeping

File size: 34,061 Bytes

26c2106

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a751d479-1500-41e2-8c01-252e849dad05",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8158cb66-9f9a-4bb2-bc6e-6a51146be10c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt \n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import classification_report,accuracy_score\n",
    "import numpy as np\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report, accuracy_score\n",
    "from sklearn.utils.class_weight import compute_class_weight\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "70ea935b-3b62-4cf9-8bef-06bf30904b20",
   "metadata": {},
   "source": [
    "## Sub Issues"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f9ddaa89-dc8d-40f5-8098-7d108ab9d578",
   "metadata": {},
   "source": [
    "### Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c1f9fd85-f47e-4962-a693-7cb9efca763a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "from sklearn.utils.class_weight import compute_class_weight\n",
    "\n",
    "def train_model(training_df, validation_df, target_column, classifier_model, subissues_to_drop=None, random_state=42):\n",
    "    # Drop specified subproducts from training and validation dataframes\n",
    "    if subissues_to_drop:\n",
    "        training_df = training_df[~training_df[target_column].isin(subissues_to_drop)]\n",
    "        validation_df = validation_df[~validation_df[target_column].isin(subissues_to_drop)]\n",
    "    \n",
    "    # Compute class weights\n",
    "    class_weights = compute_class_weight('balanced', classes=np.unique(training_df[target_column]), y=training_df[target_column])\n",
    "    \n",
    "    # Convert class weights to dictionary format\n",
    "    class_weight = {label: weight for label, weight in zip(np.unique(training_df[target_column]), class_weights)}\n",
    "    \n",
    "    # Define a default class weight for missing classes\n",
    "    default_class_weight = 0.5\n",
    "    \n",
    "    # Assign default class weight for missing classes\n",
    "    for label in np.unique(training_df[target_column]):\n",
    "        if label not in class_weight:\n",
    "            class_weight[label] = default_class_weight\n",
    "    \n",
    "    # Define the pipeline\n",
    "    pipeline = Pipeline([\n",
    "        ('tfidf', TfidfVectorizer()),\n",
    "        ('classifier', classifier_model)\n",
    "    ])\n",
    "    \n",
    "    # Train the pipeline\n",
    "    pipeline.fit(training_df['Consumer complaint narrative'], training_df[target_column])\n",
    "    \n",
    "    # Make predictions on the validation set\n",
    "    y_pred = pipeline.predict(validation_df['Consumer complaint narrative'])\n",
    "    \n",
    "    # Evaluate the pipeline\n",
    "    accuracy = accuracy_score(validation_df[target_column], y_pred)\n",
    "    print(\"\\nClassification Report:\")\n",
    "    print(classification_report(validation_df[target_column], y_pred))\n",
    "    print(\"Accuracy:\", accuracy)\n",
    "    \n",
    "    return pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7a0d277-75c1-4435-86e5-d0ee7d3dabf3",
   "metadata": {},
   "source": [
    "#### Reading the Issue DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "c1ea3fbc-4062-483b-a5c6-65d644983ce5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "def read_subissue_data(issue_name, data_dir='../data_preprocessing_scripts/issue_data_splits'):\n",
    "    # Convert issue name to lower case and replace '/' and spaces with underscores\n",
    "    issue_name = issue_name.replace('/', '_').replace(' ', '_').lower()\n",
    "    \n",
    "    # Construct file paths\n",
    "    train_file = os.path.join(data_dir, f\"{issue_name}_train_data.csv\")\n",
    "    val_file = os.path.join(data_dir, f\"{issue_name}_val_data.csv\")\n",
    "    \n",
    "    # Read the CSV files\n",
    "    train_df = pd.read_csv(train_file)\n",
    "    val_df = pd.read_csv(val_file )\n",
    "    \n",
    "    return train_df, val_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "ae74f945-3fe9-4207-8fe0-fb4d8c5d2a27",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"../data_splits/train-data-split.csv\")\n",
    "issue_categories = list(df_train['Issue'].unique())\n",
    "\n",
    "def classify_sub_issue(issue):\n",
    "    issue_name = issue.replace('/', '_').replace(' ', '_').lower()\n",
    "    train_df,val_df= read_subissue_data(issue)\n",
    "    rf_classifier = RandomForestClassifier(n_estimators=200, random_state=42)\n",
    "    trained_model = train_model(train_df, val_df, 'Sub-issue', rf_classifier, random_state=42)\n",
    "\n",
    "    # Saving the model\n",
    "    with open(f\"issue_models/{issue_name}.pkl\", 'wb') as f:\n",
    "        pickle.dump(trained_model, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0540f68f-4e14-40c2-ba9e-1875138678a1",
   "metadata": {},
   "source": [
    "### Sub-issues classification"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a53f046-c7f8-48de-a8f3-9a66ffad5f55",
   "metadata": {},
   "source": [
    "#### 1. Problem with a company's investigation into an existing problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "a33a3974-b3e9-466c-85a9-8d9b0255bbba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Problem with a company's investigation into an existing problem\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                                                       precision    recall  f1-score   support\n",
      "\n",
      "Difficulty submitting a dispute or getting information about a dispute over the phone       0.88      0.37      0.52        41\n",
      "                                                 Investigation took more than 30 days       0.95      0.73      0.83       162\n",
      "                                           Problem with personal statement of dispute       0.90      0.53      0.67        53\n",
      "                              Their investigation did not fix an error on your report       0.91      1.00      0.95      1122\n",
      "                                  Was not notified of investigation status or results       0.98      0.87      0.92       209\n",
      "\n",
      "                                                                             accuracy                           0.92      1587\n",
      "                                                                            macro avg       0.93      0.70      0.78      1587\n",
      "                                                                         weighted avg       0.92      0.92      0.91      1587\n",
      "\n",
      "Accuracy: 0.9199747952110902\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[0]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ffa280b-614f-48b2-9870-70fb053b45b6",
   "metadata": {},
   "source": [
    "#### 2. Incorrect information on your report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "3d431635-227e-4873-b017-8cb4180a6e2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Incorrect information on your report\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                     precision    recall  f1-score   support\n",
      "\n",
      "                      Account information incorrect       0.74      0.68      0.71       699\n",
      "                           Account status incorrect       0.87      0.73      0.79       771\n",
      "                Information belongs to someone else       0.90      0.99      0.94      4337\n",
      "Information is missing that should be on the report       0.95      0.31      0.47        65\n",
      "       Old information reappears or never goes away       0.93      0.40      0.56       126\n",
      "                     Personal information incorrect       0.95      0.78      0.86       440\n",
      "               Public record information inaccurate       0.98      0.47      0.64       102\n",
      "\n",
      "                                           accuracy                           0.88      6540\n",
      "                                          macro avg       0.90      0.62      0.71      6540\n",
      "                                       weighted avg       0.88      0.88      0.88      6540\n",
      "\n",
      "Accuracy: 0.8831804281345565\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[1]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f5cb1853-9bc1-4541-9dac-5cb208abcfc5",
   "metadata": {},
   "source": [
    "#### 3. Problem with a credit reporting company's investigation into an existing problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "86f04fd6-7625-4aba-9094-f7025078d1fc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Problem with a credit reporting company's investigation into an existing problem\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                                                       precision    recall  f1-score   support\n",
      "\n",
      "Difficulty submitting a dispute or getting information about a dispute over the phone       0.83      0.36      0.50        83\n",
      "                                                 Investigation took more than 30 days       0.97      0.84      0.90       505\n",
      "                                           Problem with personal statement of dispute       1.00      0.38      0.55        47\n",
      "                              Their investigation did not fix an error on your report       0.92      0.99      0.95      2277\n",
      "                                  Was not notified of investigation status or results       0.96      0.88      0.92       473\n",
      "\n",
      "                                                                             accuracy                           0.93      3385\n",
      "                                                                            macro avg       0.94      0.69      0.77      3385\n",
      "                                                                         weighted avg       0.93      0.93      0.92      3385\n",
      "\n",
      "Accuracy: 0.9288035450516987\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[2]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f00b115b-46c4-4d46-adae-a10a5e92a839",
   "metadata": {},
   "source": [
    "#### 4. Problem with a purchase shown on your statement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "e6577c57-6caa-4221-a68b-e0b65e739511",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Problem with a purchase shown on your statement\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                                                  precision    recall  f1-score   support\n",
      "\n",
      "               Card was charged for something you did not purchase with the card       0.81      0.19      0.30        70\n",
      "Credit card company isn't resolving a dispute about a purchase on your statement       0.75      0.98      0.85       172\n",
      "\n",
      "                                                                        accuracy                           0.75       242\n",
      "                                                                       macro avg       0.78      0.58      0.58       242\n",
      "                                                                    weighted avg       0.77      0.75      0.69       242\n",
      "\n",
      "Accuracy: 0.7520661157024794\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[3]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a8648f75-e62d-4b80-b4ed-ccf104137c74",
   "metadata": {},
   "source": [
    "#### 5. Improper use of your report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "ea64cabb-1372-4a52-826f-8b1bf8f2cb32",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Improper use of your report\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                          precision    recall  f1-score   support\n",
      "\n",
      "Credit inquiries on your report that you don't recognize       0.93      0.84      0.88       990\n",
      "           Reporting company used your report improperly       0.96      0.98      0.97      3654\n",
      "\n",
      "                                                accuracy                           0.95      4644\n",
      "                                               macro avg       0.95      0.91      0.93      4644\n",
      "                                            weighted avg       0.95      0.95      0.95      4644\n",
      "\n",
      "Accuracy: 0.9528423772609819\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[4]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f48f3308-d884-440c-8a24-8a81e7140ee0",
   "metadata": {},
   "source": [
    "#### 6. Account Operations and Unauthorized Transaction Issues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "08ec2d0e-950e-4f6d-9cdb-8328fed17384",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Account Operations and Unauthorized Transaction Issues\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                  precision    recall  f1-score   support\n",
      "\n",
      "             Account opened as a result of fraud       0.83      0.67      0.74        43\n",
      "Card opened as result of identity theft or fraud       0.88      0.77      0.82        39\n",
      "                  Transaction was not authorized       0.86      0.97      0.91       102\n",
      "\n",
      "                                        accuracy                           0.86       184\n",
      "                                       macro avg       0.86      0.80      0.83       184\n",
      "                                    weighted avg       0.86      0.86      0.85       184\n",
      "\n",
      "Accuracy: 0.8586956521739131\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[5]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c7332c0-3cc9-42b6-9bbd-5b33719e676d",
   "metadata": {},
   "source": [
    "#### 7. Payment and Funds Management"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "bf0e0437-a85d-4dcd-8b93-982fbd33cee6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Payment and Funds Management\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                precision    recall  f1-score   support\n",
      "\n",
      "               Billing problem       1.00      0.65      0.79        34\n",
      " Overdrafts and overdraft fees       0.89      0.92      0.91        74\n",
      "Problem during payment process       0.81      0.94      0.87        65\n",
      "\n",
      "                      accuracy                           0.87       173\n",
      "                     macro avg       0.90      0.83      0.85       173\n",
      "                  weighted avg       0.88      0.87      0.87       173\n",
      "\n",
      "Accuracy: 0.8728323699421965\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[6]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b034a174-16e7-41b6-970c-ef23d9b9da29",
   "metadata": {},
   "source": [
    "#### 8. Managing an account"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "bc62e5f5-14ef-4d8a-8434-79b4e7da5a9a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Managing an account\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                              precision    recall  f1-score   support\n",
      "\n",
      "                              Banking errors       0.50      0.10      0.16        73\n",
      "                    Deposits and withdrawals       0.46      0.90      0.61       201\n",
      "                                 Fee problem       0.55      0.57      0.56        56\n",
      "Funds not handled or disbursed as instructed       0.00      0.00      0.00        72\n",
      "                   Problem accessing account       0.00      0.00      0.00        40\n",
      "           Problem using a debit or ATM card       0.71      0.58      0.64       113\n",
      "\n",
      "                                    accuracy                           0.52       555\n",
      "                                   macro avg       0.37      0.36      0.33       555\n",
      "                                weighted avg       0.43      0.52      0.43       555\n",
      "\n",
      "Accuracy: 0.5153153153153153\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[7]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6c2e3454-eaa2-4a71-a058-988ad7716eac",
   "metadata": {},
   "source": [
    "#### 9. Attempts to collect debt not owed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "85ad1ffc-97e5-436b-afea-abed93b67b75",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Attempts to collect debt not owed\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                   precision    recall  f1-score   support\n",
      "\n",
      "                Debt is not yours       0.64      0.93      0.76       207\n",
      "                    Debt was paid       0.96      0.31      0.46        72\n",
      "Debt was result of identity theft       0.84      0.56      0.67       129\n",
      "\n",
      "                         accuracy                           0.70       408\n",
      "                        macro avg       0.81      0.60      0.63       408\n",
      "                     weighted avg       0.76      0.70      0.68       408\n",
      "\n",
      "Accuracy: 0.7009803921568627\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[8]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "43b186f0-b626-43c2-9823-6818da478d48",
   "metadata": {},
   "source": [
    "-----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d87e677-da08-4682-9823-72c8315e52a2",
   "metadata": {},
   "source": [
    "#### 10. Written notification about debt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "214fc01d-7bf1-4b5a-b409-10b3c99076ae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Written notification about debt\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                  precision    recall  f1-score   support\n",
      "\n",
      "Didn't receive enough information to verify debt       0.77      0.99      0.87       135\n",
      "       Didn't receive notice of right to dispute       0.90      0.19      0.31        48\n",
      "\n",
      "                                        accuracy                           0.78       183\n",
      "                                       macro avg       0.84      0.59      0.59       183\n",
      "                                    weighted avg       0.81      0.78      0.72       183\n",
      "\n",
      "Accuracy: 0.7814207650273224\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[9]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7cca2ba7-f0e1-4e56-a6f0-2a3c92bcac56",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "401e87db-4759-437c-bcb1-382a7f8ed226",
   "metadata": {},
   "source": [
    "#### 11. Dealing with your lender or servicer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "9c1485fc-1b14-44c9-b4c9-d92bea864800",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Dealing with your lender or servicer\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                             precision    recall  f1-score   support\n",
      "\n",
      "   Received bad information about your loan       0.74      0.70      0.72        50\n",
      "Trouble with how payments are being handled       0.71      0.75      0.73        48\n",
      "\n",
      "                                   accuracy                           0.72        98\n",
      "                                  macro avg       0.73      0.72      0.72        98\n",
      "                               weighted avg       0.73      0.72      0.72        98\n",
      "\n",
      "Accuracy: 0.7244897959183674\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[10]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8ca1aab7-158f-48bf-871c-1fa991fb1f9e",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36ce1724-61e5-4d5b-bbaf-a79293af6506",
   "metadata": {},
   "source": [
    "#### 12. Disputes and Misrepresentations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "380ee173-6c72-40b8-9eb2-a5af680c8ff7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Disputes and Misrepresentations\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                   precision    recall  f1-score   support\n",
      "\n",
      "Attempted to collect wrong amount       0.85      0.92      0.88        66\n",
      "                    Other problem       0.85      0.65      0.74        54\n",
      "                Problem with fees       0.83      0.93      0.88        57\n",
      "\n",
      "                         accuracy                           0.84       177\n",
      "                        macro avg       0.84      0.83      0.83       177\n",
      "                     weighted avg       0.84      0.84      0.84       177\n",
      "\n",
      "Accuracy: 0.8418079096045198\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[11]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e44501a4-2021-4d78-b3c2-c937d286cb22",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "451ccf3a-c97e-46e3-9c47-c225d6e3dd49",
   "metadata": {},
   "source": [
    "#### 13. Problem with a company's investigation into an existing issue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "20201d0c-b9da-4e2e-957b-23649f06e48e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Problem with a company's investigation into an existing issue\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                                                       precision    recall  f1-score   support\n",
      "\n",
      "Difficulty submitting a dispute or getting information about a dispute over the phone       0.00      0.00      0.00         3\n",
      "                                                 Investigation took more than 30 days       1.00      1.00      1.00         3\n",
      "                                           Problem with personal statement of dispute       0.00      0.00      0.00         2\n",
      "                              Their investigation did not fix an error on your report       0.50      1.00      0.67         7\n",
      "                                  Was not notified of investigation status or results       0.00      0.00      0.00         2\n",
      "\n",
      "                                                                             accuracy                           0.59        17\n",
      "                                                                            macro avg       0.30      0.40      0.33        17\n",
      "                                                                         weighted avg       0.38      0.59      0.45        17\n",
      "\n",
      "Accuracy: 0.5882352941176471\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[12]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5d37ff8-2382-4c3b-aef0-5affd4d3083b",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9876639-9e72-49ab-9dd4-3ef5ac38a8d8",
   "metadata": {},
   "source": [
    "#### 14. Closing your account"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "95eff365-09f8-4640-9f65-4a82fc321fa9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Closing your account\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                             precision    recall  f1-score   support\n",
      "\n",
      "   Can't close your account       1.00      0.24      0.38        17\n",
      "Company closed your account       0.78      1.00      0.88        46\n",
      "\n",
      "                   accuracy                           0.79        63\n",
      "                  macro avg       0.89      0.62      0.63        63\n",
      "               weighted avg       0.84      0.79      0.74        63\n",
      "\n",
      "Accuracy: 0.7936507936507936\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[13]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c66b9044-32af-4aee-af08-b685480d9f53",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "455f8d69-5531-42e0-a53c-66427ff68fcc",
   "metadata": {},
   "source": [
    "#### 15. Credit Report and Monitoring Issues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "a039cb86-3503-4757-a8ee-7e518eafb9a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Credit Report and Monitoring Issues\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                                          precision    recall  f1-score   support\n",
      "\n",
      "                       Other problem getting your report or credit score       0.89      0.99      0.94        82\n",
      "Problem canceling credit monitoring or identify theft protection service       0.97      0.75      0.85        40\n",
      "\n",
      "                                                                accuracy                           0.91       122\n",
      "                                                               macro avg       0.93      0.87      0.89       122\n",
      "                                                            weighted avg       0.92      0.91      0.91       122\n",
      "\n",
      "Accuracy: 0.9098360655737705\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[14]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee0dfc45-96b2-4cbb-b34d-a8e1441c0c82",
   "metadata": {},
   "source": [
    "----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0dcf3701-d59f-43fa-9aa0-2c65c27a8fe0",
   "metadata": {},
   "source": [
    "#### 16. Closing an account"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "1ed7956b-3d41-46f8-a7e8-ad9f36e1694d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Closing an account\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                        precision    recall  f1-score   support\n",
      "\n",
      "              Can't close your account       1.00      0.04      0.07        27\n",
      "           Company closed your account       0.57      0.83      0.67        69\n",
      "Funds not received from closed account       0.56      0.50      0.53        50\n",
      "\n",
      "                              accuracy                           0.57       146\n",
      "                             macro avg       0.71      0.45      0.42       146\n",
      "                          weighted avg       0.64      0.57      0.51       146\n",
      "\n",
      "Accuracy: 0.5684931506849316\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[15]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3822541c-f13c-4a96-862f-4c23cf2d3895",
   "metadata": {},
   "source": [
    "#### 17. Legal and Threat Actions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "8fa5fc40-6d4f-4321-8eb0-9608dc5b84e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Issue : Legal and Threat Actions\n",
      "\n",
      "\n",
      "Classification Report:\n",
      "                                                      precision    recall  f1-score   support\n",
      "\n",
      "Threatened or suggested your credit would be damaged       1.00      1.00      1.00        48\n",
      "\n",
      "                                            accuracy                           1.00        48\n",
      "                                           macro avg       1.00      1.00      1.00        48\n",
      "                                        weighted avg       1.00      1.00      1.00        48\n",
      "\n",
      "Accuracy: 1.0\n"
     ]
    }
   ],
   "source": [
    "issue_name = issue_categories[16]\n",
    "print(f\"Issue : {issue_name}\\n\")\n",
    "\n",
    "classify_sub_issue(issue_name)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}