diff --git a/data_preprocessing_scripts/.DS_Store b/data_preprocessing_scripts/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5db4257735f2a1f8236853a5d48980825a2ab3c3 Binary files /dev/null and b/data_preprocessing_scripts/.DS_Store differ diff --git a/data_preprocessing_scripts/Data Preprocessing Script.ipynb b/data_preprocessing_scripts/Data Preprocessing Script.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..941255435ff5c2bc68f4cce54ee7f86427638b05 --- /dev/null +++ b/data_preprocessing_scripts/Data Preprocessing Script.ipynb @@ -0,0 +1,576 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5619ac0c-7398-4eb5-bdc0-8d338bf4a41f", + "metadata": {}, + "source": [ + "### Data Preprocessing" + ] + }, + { + "cell_type": "markdown", + "id": "8774cfd1-91b0-4d2d-b0f1-f057a5940cea", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### Importing Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b0b6e81d-c547-41ae-8a2b-4f8864cbc8d4", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "27dd12eb-2975-4b6f-9010-845ae2d23c8f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "id": "c49948e7-3018-4cf8-b3bc-0bae7e6a051f", + "metadata": {}, + "source": [ + "### Data Preprocessing Function" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "02ee5dbe-dc71-4839-a19b-34699133f2be", + "metadata": {}, + "outputs": [], + "source": [ + "def load_and_clean_data(file_path):\n", + " \"\"\"\n", + " Load and clean the data from the specified CSV file.\n", + "\n", + " Parameters:\n", + " - file_path (str): Path to the CSV file containing the data.\n", + "\n", + " Returns:\n", + " - DataFrame: Cleaned DataFrame containing selected columns with NaN values dropped.\n", + " \"\"\"\n", + " df = pd.read_csv(file_path)\n", + " df['Date received'] = pd.to_datetime(df['Date received'])\n", + " \n", + " cols_to_consider = ['Product', 'Sub-product', 'Issue', 'Sub-issue', 'Consumer complaint narrative',\n", + " 'Company public response', 'Company', 'State', 'ZIP code', 'Date received']\n", + " \n", + " df_new = df[cols_to_consider]\n", + " \n", + " df_new = df_new.dropna()\n", + " \n", + " return df_new" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "413135b7-1fb8-4cef-876e-99bfd1f148ac", + "metadata": {}, + "outputs": [], + "source": [ + "def filter_by_years(df, years):\n", + " \"\"\"\n", + " Filter the DataFrame to include only the rows corresponding to specified years.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The DataFrame containing data to filter.\n", + " - years (list of int): List of years to filter by.\n", + "\n", + " Returns:\n", + " - DataFrame: Filtered DataFrame containing rows corresponding to specified years.\n", + " \"\"\"\n", + " filtered_df = df[df['Date received'].dt.year.isin(years)].reset_index(drop=True)\n", + " return filtered_df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9e3f199f-0ad9-40a3-82f1-065af9efa9f5", + "metadata": {}, + "outputs": [], + "source": [ + "def map_product_column(df):\n", + " \"\"\"\n", + " Map values in the 'Product' column of the DataFrame to a standardized set of categories.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The DataFrame containing the 'Product' column to map.\n", + "\n", + " Returns:\n", + " - DataFrame: DataFrame with the 'Product' column values mapped to standardized categories.\n", + " \"\"\"\n", + "\n", + " product_map = {'Credit reporting or other personal consumer reports': 'Credit Reporting',\n", + " 'Credit reporting, credit repair services, or other personal consumer reports': 'Credit Reporting',\n", + " 'Payday loan, title loan, personal loan, or advance loan': 'Loans / Mortgage',\n", + " 'Payday loan, title loan, or personal loan': 'Loans / Mortgage',\n", + " 'Student loan': 'Loans / Mortgage',\n", + " 'Vehicle loan or lease': 'Loans / Mortgage',\n", + " 'Debt collection': 'Debt collection',\n", + " 'Credit card or prepaid card': 'Credit/Prepaid Card',\n", + " 'Credit card': 'Credit/Prepaid Card',\n", + " 'Prepaid card': 'Credit/Prepaid Card',\n", + " 'Mortgage': 'Loans / Mortgage',\n", + " 'Checking or savings account': 'Checking or savings account' \n", + " }\n", + " # Map 'Product' column\n", + " df.loc[:,'Product'] = df['Product'].map(product_map)\n", + " \n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "03a2e0a2-75ff-4b33-b081-58e3d5c791c9", + "metadata": {}, + "outputs": [], + "source": [ + "def clean_narrative(df):\n", + "\n", + " \"\"\"\n", + " Clean the consumer complaint narratives in the DataFrame by excluding short and irrelevant complaints.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The input DataFrame containing consumer complaint data.\n", + "\n", + " Returns:\n", + " - DataFrame: DataFrame with cleaned consumer complaint narratives.\n", + " \"\"\"\n", + "# Compute complaint length\n", + " df['complaint length'] = df['Consumer complaint narrative'].apply(lambda x : len(x))\n", + "\n", + " df = df[df['complaint length'] > 20]\n", + " \n", + " complaints_to_exclude = ['See document attached', 'See the attached documents.', 'Incorrect information on my credit report', 'incorrect information on my credit report',\n", + " 'please see attached file','Please see documents Attached','Incorrect information on my credit report.', 'Please see attached file', 'see attached',\n", + " 'See attached', 'SEE ATTACHED DOCUMENTS', 'See Attached', 'SEE ATTACHMENT', 'SEE ATTACHMENTS', \n", + " 'XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX XXXX']\n", + " \n", + " df = df[~df['Consumer complaint narrative'].isin(complaints_to_exclude)]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0cb28135-12d8-41fc-94a8-2968e558473b", + "metadata": {}, + "outputs": [], + "source": [ + "def filter_by_frequency(df):\n", + " \"\"\"\n", + " Filter the DataFrame based on the frequency of sub-issues and sub-products.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The input DataFrame containing consumer complaint data.\n", + "\n", + " Returns:\n", + " - DataFrame: DataFrame filtered based on the frequency of sub-issues and sub-products.\n", + " \"\"\"\n", + " # Select sub-issues with frequency > 500\n", + " sub_issues_to_consider = df['Sub-issue'].value_counts()[df['Sub-issue'].value_counts() > 500].index\n", + "\n", + " # Filter DataFrame based on selected sub-issues\n", + " reduced_subissues = df[df['Sub-issue'].isin(sub_issues_to_consider)]\n", + " # Select sub-products with frequency > 100\n", + " sub_products_to_consider = reduced_subissues['Sub-product'].value_counts()[reduced_subissues['Sub-product'].value_counts() > 100].index\n", + "\n", + " # Filter DataFrame based on selected sub-products\n", + " final_df = reduced_subissues[reduced_subissues['Sub-product'].isin(sub_products_to_consider)]\n", + "\n", + " return final_df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e102c902-645e-453e-9a67-07781ba6fc55", + "metadata": {}, + "outputs": [], + "source": [ + "def map_issue(df):\n", + " \"\"\"\n", + " Map the issues to more defined and lesser number of issues in the DataFrame.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The input DataFrame containing consumer complaint data.\n", + "\n", + " Returns:\n", + " - DataFrame: DataFrame with the 'Issue' column mapped to appropriate issues.\n", + " \"\"\"\n", + " # Create a dictionary mapping issues to sub-issues\n", + " issues_to_subissues = {}\n", + " for issue in df['Issue'].value_counts().index:\n", + " issues_to_subissues[issue] = list(df[df['Issue'] == issue]['Sub-issue'].value_counts().to_dict().keys())\n", + "\n", + " # Separate issues with only one sub-issue and more than one sub-issue\n", + " one_subissue = {key: value for key, value in issues_to_subissues.items() if len(issues_to_subissues[key]) == 1}\n", + " more_than_one_subissue = {key: value for key, value in issues_to_subissues.items() if len(issues_to_subissues[key]) > 1}\n", + "\n", + " # Existing issue mapping for issues with more than one sub-issue\n", + " existing_issue_mapping = {issue: issue for issue in more_than_one_subissue}\n", + "\n", + " # Issue renaming based on provided mapping\n", + " issue_renaming = {\n", + " 'Problem with a lender or other company charging your account': 'Account Operations and Unauthorized Transaction Issues',\n", + " 'Opening an account': 'Account Operations and Unauthorized Transaction Issues',\n", + " 'Getting a credit card': 'Account Operations and Unauthorized Transaction Issues',\n", + "\n", + " 'Unable to get your credit report or credit score': 'Credit Report and Monitoring Issues',\n", + " 'Credit monitoring or identity theft protection services': 'Credit Report and Monitoring Issues',\n", + " 'Identity theft protection or other monitoring services': 'Credit Report and Monitoring Issues',\n", + "\n", + " 'Problem caused by your funds being low': 'Payment and Funds Management',\n", + " 'Problem when making payments': 'Payment and Funds Management',\n", + " 'Managing the loan or lease': 'Payment and Funds Management',\n", + "\n", + " 'False statements or representation': 'Disputes and Misrepresentations',\n", + " 'Fees or interest': 'Disputes and Misrepresentations',\n", + " 'Other features, terms, or problems': 'Disputes and Misrepresentations',\n", + "\n", + " 'Took or threatened to take negative or legal action': 'Legal and Threat Actions'\n", + " }\n", + "\n", + " # Combine issue renaming and existing issue mapping\n", + " issues_mapping = {**issue_renaming, **existing_issue_mapping}\n", + "\n", + " # Map 'Issue' column using the defined mapping dictionary\n", + " df['Issue'] = df['Issue'].apply(lambda x: issues_mapping[x])\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b8a50ac3-1a5b-4c78-92bb-da14d38a679c", + "metadata": {}, + "outputs": [], + "source": [ + "def split_and_save_data(df,year, test_size=0.25, random_state=42, directory_to_save='./data_splits/'):\n", + " \"\"\"\n", + " Split the input DataFrame into train and test sets, and save them as CSV files with the specified year included in the file names.\n", + "\n", + " Parameters:\n", + " - df (DataFrame): The input DataFrame containing consumer complaint data.\n", + " - year (int): The year associated with the data split.\n", + " - test_size (float, optional): The proportion of the dataset to include in the test split. Default is 0.25.\n", + " - random_state (int, optional): The seed used by the random number generator. Default is 42.\n", + " - directory_to_save (str, optional): The directory path to save the data splits. Default is './data_splits/'.\n", + "\n", + " Returns:\n", + " - None\n", + " \"\"\"\n", + " # Split the data into train and test sets\n", + " X = df['Consumer complaint narrative']\n", + " y = df[['Product', 'Sub-product', 'Issue', 'Sub-issue']]\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y['Product'], test_size=test_size, random_state=random_state)\n", + "\n", + " # Concatenate X_train and y_train, and X_test and y_test respectively\n", + " train_df = pd.concat([X_train, y_train], axis=1).reset_index(drop=True)\n", + " test_df = pd.concat([X_test, y_test], axis=1).reset_index(drop=True)\n", + "\n", + " # Create directory if it doesn't exist\n", + " if not os.path.exists(directory_to_save):\n", + " os.makedirs(directory_to_save)\n", + " \n", + " # Save train and test data as CSV files with the year included in the file names\n", + " train_df.to_csv(os.path.join(directory_to_save, f'train-data-split_{year}.csv'), index=False)\n", + " test_df.to_csv(os.path.join(directory_to_save, f'test-data-split_{year}.csv'), index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "a11fbb2c-548e-4bf8-bb41-abe22a4bd485", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### Main Cleaning Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "479d2872-0202-4eac-85d9-ce2b532881dd", + "metadata": {}, + "outputs": [], + "source": [ + "def main(file_path, year,year_name):\n", + " # Load and clean the data\n", + " df_cleaned = load_and_clean_data(file_path)\n", + " \n", + " # Filter the data by years\n", + " df_filtered = filter_by_years(df_cleaned, year)\n", + " \n", + " # Map the 'Product' column\n", + " df_mapped = map_product_column(df_filtered)\n", + " \n", + " # Clean the customer narratives in the data\n", + " df_clean_narrative = clean_narrative(df_mapped)\n", + "\n", + " # Clean the data by frequency\n", + " df_freq = filter_by_frequency(df_clean_narrative)\n", + "\n", + " #Mapping the Issues and filtering Sub Issues\n", + " df_final = map_issue_to_subissue(df_freq)\n", + " \n", + " # Split and save the data\n", + " split_and_save_data(df_final,year_name)\n", + " return df_final" + ] + }, + { + "cell_type": "markdown", + "id": "60c6f92a-de2b-4c8a-817e-fb49a68f87ba", + "metadata": {}, + "source": [ + "### Calling the data preprocessing script" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "76932edd-4de8-47e1-a1a1-3cc4b07d6850", + "metadata": {}, + "outputs": [], + "source": [ + "file_path = 'complaints.csv'\n", + "years_to_include = [2023]\n", + "year_name=2023\n", + "df_final=main(file_path, years_to_include,year_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b55f4b09-6c89-470b-a4f0-60c0148534ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(247517, 11)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ed38bd8a-2004-435a-87b2-2bcc43dbd565", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Credit Reporting 211695\n", + "Checking or savings account 12285\n", + "Credit/Prepaid Card 11975\n", + "Debt collection 9380\n", + "Loans / Mortgage 2182\n", + "Name: Product, dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final.Product.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "dab29296-8a5b-44c3-8c6e-7b60e062343f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Credit reporting 210735\n", + "General-purpose credit card or charge card 10668\n", + "Checking account 10409\n", + "Other debt 3041\n", + "I do not know 2316\n", + "Credit card debt 1652\n", + "Federal student loan servicing 1344\n", + "Store credit card 1307\n", + "Medical debt 1053\n", + "Savings account 989\n", + "Other personal consumer report 960\n", + "Loan 732\n", + "Other banking product or service 725\n", + "Auto debt 581\n", + "Telecommunications debt 419\n", + "Rental debt 179\n", + "CD (Certificate of Deposit) 162\n", + "Mortgage debt 139\n", + "Conventional home mortgage 106\n", + "Name: Sub-product, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final['Sub-product'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "10127c9e-ea4a-49ce-a5f2-76991abde850", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Incorrect information on your report 87200\n", + "Improper use of your report 61868\n", + "Problem with a credit reporting company's investigation into an existing problem 45371\n", + "Problem with a company's investigation into an existing problem 20985\n", + "Managing an account 7367\n", + "Attempts to collect debt not owed 5453\n", + "Problem with a purchase shown on your statement 3253\n", + "Account Operations and Unauthorized Transaction Issues 2450\n", + "Written notification about debt 2404\n", + "Disputes and Misrepresentations 2311\n", + "Payment and Funds Management 2259\n", + "Closing an account 1975\n", + "Credit Report and Monitoring Issues 1630\n", + "Dealing with your lender or servicer 1293\n", + "Closing your account 813\n", + "Legal and Threat Actions 662\n", + "Problem with a company's investigation into an existing issue 223\n", + "Name: Issue, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final['Issue'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "e774f715-6d3d-4035-9f28-753801490d13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Information belongs to someone else 57850\n", + "Reporting company used your report improperly 48732\n", + "Their investigation did not fix an error on your report 45395\n", + "Credit inquiries on your report that you don't recognize 13136\n", + "Account status incorrect 10208\n", + "Account information incorrect 9267\n", + "Was not notified of investigation status or results 9200\n", + "Investigation took more than 30 days 8928\n", + "Personal information incorrect 5900\n", + "Debt is not yours 2785\n", + "Deposits and withdrawals 2626\n", + "Credit card company isn't resolving a dispute about a purchase on your statement 2289\n", + "Didn't receive enough information to verify debt 1777\n", + "Debt was result of identity theft 1727\n", + "Old information reappears or never goes away 1714\n", + "Difficulty submitting a dispute or getting information about a dispute over the phone 1704\n", + "Company closed your account 1517\n", + "Problem using a debit or ATM card 1503\n", + "Public record information inaccurate 1384\n", + "Transaction was not authorized 1378\n", + "Problem with personal statement of dispute 1352\n", + "Other problem getting your report or credit score 1109\n", + "Card was charged for something you did not purchase with the card 964\n", + "Banking errors 958\n", + "Funds not handled or disbursed as instructed 955\n", + "Overdrafts and overdraft fees 951\n", + "Debt was paid 941\n", + "Information is missing that should be on the report 877\n", + "Attempted to collect wrong amount 861\n", + "Problem during payment process 840\n", + "Fee problem 764\n", + "Problem with fees 749\n", + "Other problem 701\n", + "Received bad information about your loan 677\n", + "Funds not received from closed account 673\n", + "Threatened or suggested your credit would be damaged 662\n", + "Didn't receive notice of right to dispute 627\n", + "Trouble with how payments are being handled 616\n", + "Can't close your account 598\n", + "Problem accessing account 561\n", + "Account opened as a result of fraud 561\n", + "Problem canceling credit monitoring or identify theft protection service 521\n", + "Card opened as result of identity theft or fraud 511\n", + "Billing problem 468\n", + "Name: Sub-issue, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final['Sub-issue'].value_counts()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data_preprocessing_scripts/data_splits/test-data-split_2022.csv b/data_preprocessing_scripts/data_splits/test-data-split_2022.csv new file mode 100644 index 0000000000000000000000000000000000000000..37faa264a7e4f072f227798d3f876f3f9c5f6f59 --- /dev/null +++ b/data_preprocessing_scripts/data_splits/test-data-split_2022.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c203087801339af843b99a42a2187585d4af3445aab36c59c7cd7653b87587e +size 62357439 diff --git a/data_preprocessing_scripts/data_splits/test-data-split_2023.csv b/data_preprocessing_scripts/data_splits/test-data-split_2023.csv new file mode 100644 index 0000000000000000000000000000000000000000..369bf3608f7b8b317aa50ce59df2c3007493ea8b --- /dev/null +++ b/data_preprocessing_scripts/data_splits/test-data-split_2023.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a253ea22f1b90530a803e61f1624d3b270ef62c88b89aece879cbe793e64eea +size 68758811 diff --git a/data_preprocessing_scripts/data_splits/train-data-balanced.csv b/data_preprocessing_scripts/data_splits/train-data-balanced.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ab81ae007ab9d01fe286d644b8d0f54530f330e --- /dev/null +++ b/data_preprocessing_scripts/data_splits/train-data-balanced.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acbfac231d968f341f65ce49dd5369f75fea7ab792e29c6116dfafe113589186 +size 70018604 diff --git a/data_preprocessing_scripts/data_splits/train-data-split_2022.csv b/data_preprocessing_scripts/data_splits/train-data-split_2022.csv new file mode 100644 index 0000000000000000000000000000000000000000..c6754280d92879c91ad83c23a8da9da14354aa73 --- /dev/null +++ b/data_preprocessing_scripts/data_splits/train-data-split_2022.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dac829454c1da6425cfa6d07ad668e2150d4854b6a470c413bb267f7f0f51d +size 185727392 diff --git a/data_preprocessing_scripts/data_splits/train-data-split_2023.csv b/data_preprocessing_scripts/data_splits/train-data-split_2023.csv new file mode 100644 index 0000000000000000000000000000000000000000..668f6a708e159c81915ab75df7ed146632f859b7 --- /dev/null +++ b/data_preprocessing_scripts/data_splits/train-data-split_2023.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0cab77284db3c21e6d186d5a2d46ba27557a3320de250c19784822c73fc443 +size 205853579 diff --git a/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_data.csv b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..63c47e9b3e44001ba95b3c0fcd9c4c9d7f249951 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204e335acf3df6981acc48e8aa845b8f2f1ca356d961f99f3e973aca273599bc +size 2090623 diff --git a/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_train_data.csv b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..33c8f7b5d3ab787cfeb8cc6f0420309cf1178343 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f00513b1d44dbc7190ff68aa135655fdd22fe4b38bbe042a7713e68fc85ed8 +size 1883764 diff --git a/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_val_data.csv b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..9df011d0825be15306a82932f7a1f7d91b6dba7f --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/account_operations_and_unauthorized_transaction_issues_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e46867c68fee83e0994f36aa6e39be17a2e5e67eb05f17378aedc7bc01350ce +size 206915 diff --git a/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_data.csv b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..4b98a8aa70d1867d98127185f75a856e62036dde --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126f320e8e8df97c756c507ff4eb5c669f6058c92a3ec25f52c5f7b81599719b +size 4135658 diff --git a/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_train_data.csv b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..90adb04335c226e16744e18430f2de08d76f8674 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96f4e5342aa61f35cdd4eb42bd81a092fc25bc168f70e1f7be3322b8ea64cc4 +size 3718608 diff --git a/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_val_data.csv b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..adf76734047fa61e4f69afaa1da4489639d70291 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/attempts_to_collect_debt_not_owed_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036e70ce151a32c8a6757c2abef7f2547542d4c11c6541c6b6d705bd0063fd85 +size 417106 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_an_account_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_an_account_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..3bc9f0cf0c103f5d1b1e4b44e82282f0dadd8e4d --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_an_account_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84dbff3ce91e5c3433d3a0ae1cf543671c4025c2a9c3b9bcb40656975e17852d +size 1799450 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_an_account_train_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_an_account_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..981c83b06cd8906301942ebef85bdfc9d03ee34d --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_an_account_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216a3640f091ef01b069809e4895528912bc2665358971d0eb30eee103892a68 +size 1587242 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_an_account_val_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_an_account_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..7957f22325a2e81345b7e6d354cf1a3718ced30c --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_an_account_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704275d826033848072c0d747e2595745185cd385c4a65b4b34cb46b849eb388 +size 212264 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_your_account_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_your_account_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..c7b8258408af0450c45a0967cb1bbe9f73f600fe --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_your_account_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8428ebcc053869342b7343c709c2eef18d1a88973601dd4cead6141034c0ff2 +size 799995 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_your_account_train_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_your_account_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..c3fc4a6a40ed14bc609925e9e3a37d51e5119177 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_your_account_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4de5779cddd84b99a6695dca6e58d36ef93cd5d670fbee821a3b9ff2b39fc8 +size 736684 diff --git a/data_preprocessing_scripts/issue_data_splits/closing_your_account_val_data.csv b/data_preprocessing_scripts/issue_data_splits/closing_your_account_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..28b0534765d2f4999660c8c2cddf885c69040acf --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/closing_your_account_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e2e7589b709c008ff61ee8a2c5ac805462c4b0028074c5abb0e6c824437903 +size 63367 diff --git a/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_data.csv b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..e7a0fb7a292a8b60247f9f40d2d2a0f511dad798 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7029133836244969f80daa7f682a53951e7bfdd1fc304ccb65dcd4f126adc526 +size 2353757 diff --git a/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_train_data.csv b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..3c28fd3d5fdcb63e627762eae6a82d76de411f5d --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791d59058ebd88fec2f991f66b1e4fd557322179b4435215dcfcbc33f8984429 +size 2115089 diff --git a/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_val_data.csv b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..fa6b685597af6d9e376bc6d5ba8dce1ba5e2299f --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/credit_report_and_monitoring_issues_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fbdf8fca538f24db0dc091d8f39a155a1e2952fdf2b96a62b9317d7dab7368 +size 238724 diff --git a/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_data.csv b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..99022fd984551dc0455390e93bbe966bf84f03d7 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65c17415c7deba2c51c98fe5a96e563aff73cb9d2354a3c318f9d04ddffe746 +size 1500034 diff --git a/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_train_data.csv b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..32a090699cfb6b2a1ad45c1d2fb4ece723c8f741 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:235e66bcf2e35fd99d858d301ffd7a2831b1f33c09098cd2fd503a6642a18048 +size 1370056 diff --git a/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_val_data.csv b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..a856c1a037d2e6b21b1c0ad0374bed7a4eeb15c9 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/dealing_with_your_lender_or_servicer_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8289ab48f58adcacb8a81078cb8c9e216d61dedb87b907ac5f850ea7244218 +size 130034 diff --git a/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_data.csv b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..0e297ef4a61ba7ad3cbc96e651d342995214126e --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa794e53b61d14225d6c03bc749fb83ad315ac5f485c4bcf946a6e0cc0fb34a +size 2207971 diff --git a/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_train_data.csv b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..8c5aefeb4a23f22919f8555e10dac46f5fc69243 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00081a0b4c6c429f141df193159b55c550c2f15007970a54c12b10e25166e970 +size 1981586 diff --git a/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_val_data.csv b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..28827525a8996e5b857bf45aeb08f81402370e6e --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/disputes_and_misrepresentations_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66990f81784bae88083bb6cc6fa0582d94c0410050d70c261b5561547d18492d +size 226441 diff --git a/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_data.csv b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..8b963938d9c4aafedbece75af727ffd46f34856e --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ced2ef476e18679dee486e7b81ad35e6d66a6eb7ad936b92efeb85d30e65c9 +size 51426802 diff --git a/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_train_data.csv b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ade8ee420e8af6cab09368adbf87d6d12065355 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2dde33856c2cb76ef0674aa869cd6ccc6b96b548bb283d0c39c010aca0f722a +size 46289254 diff --git a/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_val_data.csv b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..10fbacd510a13baf13d4c7a8cc56256142b51bc6 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/improper_use_of_your_report_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df20ab17ff8751b8251244684294af3ebc11979b2bc699d7e207abe2bc511942 +size 5137604 diff --git a/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_data.csv b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..57e78d3f1df8609107e443d5f2787425da102c14 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c557f260383639a5248caae11d252de71cd72e88a23e20c7d32c1f65a42232bb +size 67002391 diff --git a/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_train_data.csv b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..530bf47e69ffe3b2679dd063b5308e46dfc6459b --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a947015eb1b0d8a3958d70e6a86df6dd79436fe4bb42cf7dcae5238f5a1b95 +size 60306312 diff --git a/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_val_data.csv b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..4b4fb4f5cc3d259897fd181c58dd7ad3b5fb5898 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/incorrect_information_on_your_report_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7603a2b5d168bf2a6ee56dfaa5ad909a06e35ad345c5772344d6f77765ce96 +size 6696135 diff --git a/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_data.csv b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..b90c7b8c841b34fde2ea93ceb468e24ba5d30a99 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f9c755e499c83d36f8c5cb20d813cb5ac0db73d188c32a334ea9a47b6efb52 +size 612393 diff --git a/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_train_data.csv b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..f6ab6ffb9b3b8d2831d335436e472c2e0cb379ae --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd9fd5013473734be09aac6b9563f2f10fbea3156a72d548e42bbdfef5c891a +size 543877 diff --git a/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_val_data.csv b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ec576628982a91016368ffeffce8f4b6d614f78 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/legal_and_threat_actions_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7623b91317d2f8746f340422183fd089a3b0ace3647bdf3b71692928dbf5e3a9 +size 68572 diff --git a/data_preprocessing_scripts/issue_data_splits/managing_an_account_data.csv b/data_preprocessing_scripts/issue_data_splits/managing_an_account_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..58e902b1893737f38fab790e1b7145c8ee28a2c5 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/managing_an_account_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46c2f03a742087ee42a05c22444648e56ad25bf551d19e28bd09b06a3425ef7 +size 7169538 diff --git a/data_preprocessing_scripts/issue_data_splits/managing_an_account_train_data.csv b/data_preprocessing_scripts/issue_data_splits/managing_an_account_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1b50a7f4fd7401f58680c930c77937b956be1c4 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/managing_an_account_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d0d485f89d83edb41c61262e2fe5963bfb9100f6f54ef6b16788dc967326ff +size 6420679 diff --git a/data_preprocessing_scripts/issue_data_splits/managing_an_account_val_data.csv b/data_preprocessing_scripts/issue_data_splits/managing_an_account_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..9aa642a28a814ff7437cbb4a92a5328b7b0f2803 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/managing_an_account_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9882076a622b2c2bc1399fdb27b4ca396c6d6cd7623da4ac41af0456471261 +size 748915 diff --git a/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_data.csv b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..8954e1f3f8282d193dd1461e8b52c3de4c91bdf0 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15719bc2484dc054a68e9c7d38bc110aedf6fa82962058088f8f2c1dc9d2384f +size 1854714 diff --git a/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_train_data.csv b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..a47f1246efe3ab818c4f330723c52be5dce0b044 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d12880274bfdccc9e99f2b973c5f7bf7239e2e8fdda2e08833d1a68a013c3b0 +size 1680205 diff --git a/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_val_data.csv b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..01d1c1be708034701ecb5ee97b7ca84e0d5d8aac --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/payment_and_funds_management_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec358880a0bc9e79f74843b91e0607e5ed2930831715a539231a0b28d9158681 +size 174565 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..1afeea6916d103307dce4dbe1feda855d3edefab --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51956f542a4924d8e6f5c363210406a4ccfa553f5703bdb61b9c252d3c74198b +size 320657 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_train_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..389a55b549eda4fdcb1eb94445f566c61b6cefee --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1011d1017bebc41a4df1217ee9d99604c9e1aa1e11f4f386fb34896383e8dca +size 284289 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_val_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..e686e3881edca7e2e35e425742e1e2338b834a3f --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_issue_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f7f02d80205cf532ebbd6307e849f71eaeab724e0c5c94cf7de9cca4397d32 +size 36424 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..2a0b999a608562932fde2f0c12919cd83b53555a --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf2935aed3387bc5bcf55dc3afaf29794eb8f4b084ec843b092da96b2c7590c +size 15288595 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_train_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..fb93852b7b8441b6a25a5e85978f27cb2959ff35 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ae7dd9f926f0cce7be50f1a21327b4bd2d416edd2524b87a1a5617457447ef +size 13714937 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_val_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..ca49bf05f68868a6a7a07b5ad9a24d92d8a252e7 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_company's_investigation_into_an_existing_problem_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff329c35b163e0ff2d3e868cc41a1046f045903ae0fede9cd341655f906b310 +size 1573714 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..48833c83f6b8591e2bd55f728188858788d02c31 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3324a8147b120f23f086f69a7a3ccea1b893ec6769ee470e3121dfe9bec059a0 +size 35696420 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_train_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..a67baa89e9f0667173498821df97b1f52bbedc85 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de949045217b229aa1f6d4d08c1478ed0123b227b8e1257a10693702d0495b63 +size 32014246 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_val_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..3f1722203e28ced1340aa9479818b17a57d79cda --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_credit_reporting_company's_investigation_into_an_existing_problem_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc5687fb7487d19b7564c01d8e04f532a5ccef8d15823fa0a3810e2bacce358 +size 3682230 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..56556c231fd481cd07b77f55701bb48894cc47a3 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73dcd8a78c7ef771476abfb80781f75c2f026ce225a01f6f5cb0432a603a9fb9 +size 3979084 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_train_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..1d778751fa91cde9e820e20776e869c7407bb4a8 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18355a23b3e5334b20f480eba9cb7c8c77d24204994265d7d3f8454d4c73eba8 +size 3598887 diff --git a/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_val_data.csv b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..346900da6e71c54cd09e54acc6db494a914510d9 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/problem_with_a_purchase_shown_on_your_statement_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64446c027f94d5bca528418abd659b7c395e92c88a221ca2f9273918ea6b76e3 +size 380253 diff --git a/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_data.csv b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..98dc201703412c3cab4162f4ae4abd03067d63e0 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63dec431cc39d888a2eb9cad96e7aed85b95e6ffb2cfa4db72e4743faa81bcad +size 2146333 diff --git a/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_train_data.csv b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..ed63489bb7e9655a1d8f14aea9bab08ee41ebd4e --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e999a32129dfb7bb0c77bae23e69d5850c1bfd7fbb7384ed8c4736e9f5ecf431 +size 1905618 diff --git a/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_val_data.csv b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..2d476d26510e0b7474cc4212ffe3df4eca70b7b6 --- /dev/null +++ b/data_preprocessing_scripts/issue_data_splits/written_notification_about_debt_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6079c511b80dd626dd5e46c98e2deeec01b58bf1c9f554b79fa7542424c1f9e0 +size 240771 diff --git a/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_data.csv b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ace02e09b3c00633b42f661b931bb8e221c1f23 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251984c0bfbec01f8f175faa56b5f1eeffecb49f44e389df8b7668bb8b8b0b07 +size 23045573 diff --git a/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_train_data.csv b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..e381c48513f2b577ee5c9d08de1c8e9c71101ea0 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80ca0bac655c146a81ed2f675595e52bd6ed5303801d7654082c7711526a9ed +size 20748035 diff --git a/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_val_data.csv b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..2ce5b62c042746e6f7156d95783467ca4758f068 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/checking_or_savings_account_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ec8c6d037697f9658b792088bb018cfca19595fe95b319bfbcefba32a844f8 +size 2297587 diff --git a/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_data.csv b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..bf1757e8e92478c2ff06e0eb686e51835d351ddd --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb42d621ea7e61ed79825b9e0636211986e4c0eeb64c70c820fc34c6e479205 +size 19453294 diff --git a/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_train_data.csv b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..95321c886b763f8d1a9d569d2ea1be009c143342 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af8394aa296bed661a9c4d2a8a090c0766e13454756742def339b60866ec7f8 +size 17522920 diff --git a/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_val_data.csv b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8551436b10870517781ae7b66498b003e8219ce --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_prepaid_card_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff114cfb11b9f79cf28baf29bc30eb8b5321c243e380b218316fbf4fa9d7798 +size 1930423 diff --git a/data_preprocessing_scripts/product_data_splits/credit_reporting_data.csv b/data_preprocessing_scripts/product_data_splits/credit_reporting_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..f4b231d005f72ab5fd76694249522a540e5426e3 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_reporting_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480a67a2f19c4e28df288fd343d130adf66b53a9368253cf9674e7b5e057dbef +size 15932699 diff --git a/data_preprocessing_scripts/product_data_splits/credit_reporting_train_data.csv b/data_preprocessing_scripts/product_data_splits/credit_reporting_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..60fc30cdc63f83f88a48d66079fbd4bc86ba5533 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_reporting_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6971f096c555a914cab7afbfd6baf33469030a36101157c296b72dbf479054cd +size 14446123 diff --git a/data_preprocessing_scripts/product_data_splits/credit_reporting_val_data.csv b/data_preprocessing_scripts/product_data_splits/credit_reporting_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..14541d3e83f41b653a8060e1735d19f83faaef29 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/credit_reporting_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb45cab1fba415eacba26cb1c6eddc789495ce69571763805a6ef69883a06c6b +size 1486625 diff --git a/data_preprocessing_scripts/product_data_splits/debt_collection_data.csv b/data_preprocessing_scripts/product_data_splits/debt_collection_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..0a6307104e2e95c66c721aa05c7dbbae1ecd3079 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/debt_collection_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2667849dbd4c64323dcf0d4df178f2fe2f31e5a63b2d0ab19b3cb2463a0b05e +size 7470284 diff --git a/data_preprocessing_scripts/product_data_splits/debt_collection_train_data.csv b/data_preprocessing_scripts/product_data_splits/debt_collection_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..77cb327700b53e3efc3b45558c69942a9144c30f --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/debt_collection_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b0567681c1c73cf2c97f783f3b6d500a78b3d0a624f5fe8ca2388b4ac5c847 +size 6729685 diff --git a/data_preprocessing_scripts/product_data_splits/debt_collection_val_data.csv b/data_preprocessing_scripts/product_data_splits/debt_collection_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..85e522bde1f5492c5902f68c0620f7955b071df5 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/debt_collection_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c7c312c615e1824ee2ff447762828b24e6d6d96fd7f2a48f7f448a7f3cb33c +size 740648 diff --git a/data_preprocessing_scripts/product_data_splits/loans___mortgage_data.csv b/data_preprocessing_scripts/product_data_splits/loans___mortgage_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..e4c79d26dfa927132dcf901d1ca9473b81eabdb7 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/loans___mortgage_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76e0ba2330ae8666d12ead1bf3201853de1148a6d1b6d2178cbcb98dc23e2a7 +size 4116950 diff --git a/data_preprocessing_scripts/product_data_splits/loans___mortgage_train_data.csv b/data_preprocessing_scripts/product_data_splits/loans___mortgage_train_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..f98b4e91c3eab422e57335e642f29661c8f40095 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/loans___mortgage_train_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7944e7594cd544c6127fe458abe6fa2496bd269de1872a556b806d757f02874d +size 3699961 diff --git a/data_preprocessing_scripts/product_data_splits/loans___mortgage_val_data.csv b/data_preprocessing_scripts/product_data_splits/loans___mortgage_val_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..f631b753313444468f59ee0b501935995f371ab2 --- /dev/null +++ b/data_preprocessing_scripts/product_data_splits/loans___mortgage_val_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0903996e37c8eac5cc05502863b154deb41de6ce4921282d8ecba001326962 +size 417038