import os
import glob
import shutil
import logging
import warnings
from typing import Optional, Union, Dict, List
from datetime import datetime

import pandas as pd
from dateparser import parse

from app.categorization.categorizer_list import categorize_list
from app.categorization.config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE

# Read file and process it (e.g. categorize transactions)
async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
    """
    Process the input file by reading, cleaning, standardizing, and categorizing the transactions.

    Args:
        file_path (str): Path to the input file.

    Returns:
        Dict[str, Union[str, pd.DataFrame]]: Dictionary containing the file name, processed output, and error information if any
    """  

    file_name = os.path.basename(file_path)
    result= {'file_name': file_name, 'output': pd.DataFrame(), 'error': ''}
    try:
        # Read file into standardized tx format: source, date, type, category, description, amount 
        tx_list = standardize_csv_file(file_path)

        # Categorize transactions
        result['output'] = await categorize_list(tx_list)
        print(f'File processed sucessfully: {file_name}')

    except Exception as e:
        # Return an error indicator and exception info
        logging.log(logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}")
        print(f'ERROR processing file {file_name}: {e}')
        result['error'] = str(e)
    
    return result


def standardize_csv_file(file_path: str) -> pd.DataFrame:
    """
    Read and prepare the data from the input file.

    Args:
        file_path (str): Path to the input file.

    Returns:
        pd.DataFrame: Prepared transaction data.
    """

    tx_list = pd.read_csv(file_path, index_col=False)    
    tx_list.attrs['file_name'] = file_path
    tx_list.columns = tx_list.columns.str.lower().str.strip()

    # Standardize dates to YYYY/MM/DD format
    tx_list['date'] = pd.to_datetime(tx_list['date']).dt.strftime('%Y/%m/%d')

    # Add source and reindex to desired tx format; category column is new and therefore empty
    tx_list.loc[:, 'source'] = os.path.basename(file_path)
    tx_list = tx_list.reindex(columns=['date', 'expense/income', 'category', 'name/description', 'amount'])

    return tx_list


def save_results(results: List) -> None:
    """
    Merge all interim results in the input folder and write the merged results to the output file.

    Args:
        in_folder (str): Path to the input folder containing interim results.
        out_file (str): Path to the output file.

    Returns:
        None
    """

    # Concatenate all (valid) results into a single DataFrame
    # Print errors to console
    ok_files = []
    ko_files = []
    error_messages = []

    col_list = ['date', 'expense/income', 'category', 'name/description', 'amount']
    tx_list = pd.DataFrame(columns=col_list)
    for result in results:
        if not result['error']:
            ok_files.append(result['file_name'])
            result_df = result['output']
            result_df.columns = col_list
            tx_list = pd.concat([tx_list, result_df], ignore_index=True)
        else:
            ko_files.append(result['file_name'])
            error_messages.append(f"{result['file_name']}: {result['error']}")  

    # Write contents to output file (based on file type)
    tx_list.to_csv(RESULT_OUTPUT_FILE, mode="a", index=False, header=not os.path.exists(RESULT_OUTPUT_FILE))

    new_ref_data = tx_list[['name/description', 'category']]
    if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
        # If it exists, add master file to interim results
        old_ref_data = pd.read_csv(CATEGORY_REFERENCE_OUTPUT_FILE, names=['name/description', 'category'], header=0)
        new_ref_data = pd.concat([old_ref_data, new_ref_data], ignore_index=True)
        
    # Drop duplicates, sort, and write to create new Master File
    new_ref_data.drop_duplicates(subset=['name/description']).sort_values(by=['name/description']).to_csv(CATEGORY_REFERENCE_OUTPUT_FILE, mode="w", index=False, header=True)

    # Summarize results
    print(f"\nProcessed {len(results)} files: {len(ok_files)} successful, {len(ko_files)} with errors\n")
    if len(ko_files):
        print(f"Errors in the following files:")
        for message in error_messages:
            print(f"  {message}")
        print('\n')