Spaces:

praneeth-hakeem-patrick
/

backend

Sleeping

App Files Files Community

save processed results to database

by praneethys - opened Jun 3

base: refs/heads/main

←

from: refs/pr/8

Discussion Files changed

+48

-32

Files changed (4) hide show

app/categorization/file_processing.py +34 -28
app/model/transaction.py +8 -1
app/schema/index.py +4 -0
app/transactions_rag/categorize_transactions.ipynb +2 -3

app/categorization/file_processing.py CHANGED Viewed

@@ -11,6 +11,9 @@ from dateparser import parse
 from app.categorization.categorizer_list import categorize_list
 from app.categorization.config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
 # Read file and process it (e.g. categorize transactions)
 async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
@@ -22,26 +25,25 @@ async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
     Returns:
         Dict[str, Union[str, pd.DataFrame]]: Dictionary containing the file name, processed output, and error information if any
-    """
     file_name = os.path.basename(file_path)
-    result= {'file_name': file_name, 'output': pd.DataFrame(), 'error': ''}
     try:
-        # Read file into standardized tx format: source, date, type, category, description, amount
         tx_list = standardize_csv_file(file_path)
         # Categorize transactions
-        result['output'] = await categorize_list(tx_list)
-        print(f'File processed sucessfully: {file_name}')
     except Exception as e:
         # Return an error indicator and exception info
         logging.log(logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}")
-        print(f'ERROR processing file {file_name}: {e}')
-        result['error'] = str(e)
-    return result
 def standardize_csv_file(file_path: str) -> pd.DataFrame:
@@ -55,21 +57,21 @@ def standardize_csv_file(file_path: str) -> pd.DataFrame:
         pd.DataFrame: Prepared transaction data.
     """
-    tx_list = pd.read_csv(file_path, index_col=False)
-    tx_list.attrs['file_name'] = file_path
     tx_list.columns = tx_list.columns.str.lower().str.strip()
     # Standardize dates to YYYY/MM/DD format
-    tx_list['date'] = pd.to_datetime(tx_list['date']).dt.strftime('%Y/%m/%d')
     # Add source and reindex to desired tx format; category column is new and therefore empty
-    tx_list.loc[:, 'source'] = os.path.basename(file_path)
-    tx_list = tx_list.reindex(columns=['date', 'expense/income', 'category', 'name/description', 'amount'])
     return tx_list
-def save_results(results: List) -> None:
     """
     Merge all interim results in the input folder and write the merged results to the output file.
@@ -87,29 +89,33 @@ def save_results(results: List) -> None:
     ko_files = []
     error_messages = []
-    col_list = ['date', 'expense/income', 'category', 'name/description', 'amount']
     tx_list = pd.DataFrame(columns=col_list)
     for result in results:
-        if not result['error']:
-            ok_files.append(result['file_name'])
-            result_df = result['output']
             result_df.columns = col_list
             tx_list = pd.concat([tx_list, result_df], ignore_index=True)
         else:
-            ko_files.append(result['file_name'])
-            error_messages.append(f"{result['file_name']}: {result['error']}")
-    # Write contents to output file (based on file type)
-    tx_list.to_csv(RESULT_OUTPUT_FILE, mode="a", index=False, header=not os.path.exists(RESULT_OUTPUT_FILE))
-    new_ref_data = tx_list[['name/description', 'category']]
     if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
         # If it exists, add master file to interim results
-        old_ref_data = pd.read_csv(CATEGORY_REFERENCE_OUTPUT_FILE, names=['name/description', 'category'], header=0)
         new_ref_data = pd.concat([old_ref_data, new_ref_data], ignore_index=True)
     # Drop duplicates, sort, and write to create new Master File
-    new_ref_data.drop_duplicates(subset=['name/description']).sort_values(by=['name/description']).to_csv(CATEGORY_REFERENCE_OUTPUT_FILE, mode="w", index=False, header=True)
     # Summarize results
     print(f"\nProcessed {len(results)} files: {len(ok_files)} successful, {len(ko_files)} with errors\n")
@@ -117,4 +123,4 @@ def save_results(results: List) -> None:
         print(f"Errors in the following files:")
         for message in error_messages:
             print(f"  {message}")
-        print('\n')

 from app.categorization.categorizer_list import categorize_list
 from app.categorization.config import RESULT_OUTPUT_FILE, CATEGORY_REFERENCE_OUTPUT_FILE
+from app.model.transaction import Transaction
+from app.schema.index import TransactionCreate
 # Read file and process it (e.g. categorize transactions)
 async def process_file(file_path: str) -> Dict[str, Union[str, pd.DataFrame]]:
     Returns:
         Dict[str, Union[str, pd.DataFrame]]: Dictionary containing the file name, processed output, and error information if any
+    """
     file_name = os.path.basename(file_path)
+    result = {"file_name": file_name, "output": pd.DataFrame(), "error": ""}
     try:
+        # Read file into standardized tx format: source, date, type, category, description, amount
         tx_list = standardize_csv_file(file_path)
         # Categorize transactions
+        result["output"] = await categorize_list(tx_list)
+        print(f"File processed sucessfully: {file_name}")
     except Exception as e:
         # Return an error indicator and exception info
         logging.log(logging.ERROR, f"| File: {file_name} | Unexpected Error: {e}")
+        print(f"ERROR processing file {file_name}: {e}")
+        result["error"] = str(e)
+    return result
 def standardize_csv_file(file_path: str) -> pd.DataFrame:
         pd.DataFrame: Prepared transaction data.
     """
+    tx_list = pd.read_csv(file_path, index_col=False)
+    tx_list.attrs["file_name"] = file_path
     tx_list.columns = tx_list.columns.str.lower().str.strip()
     # Standardize dates to YYYY/MM/DD format
+    tx_list["date"] = pd.to_datetime(tx_list["date"]).dt.strftime("%Y/%m/%d")
     # Add source and reindex to desired tx format; category column is new and therefore empty
+    tx_list.loc[:, "source"] = os.path.basename(file_path)
+    tx_list = tx_list.reindex(columns=["date", "expense/income", "category", "name/description", "amount"])
     return tx_list
+async def save_results(results: List) -> None:
     """
     Merge all interim results in the input folder and write the merged results to the output file.
     ko_files = []
     error_messages = []
+    col_list = ["transaction_date", "type", "category", "name_description", "amount"]
     tx_list = pd.DataFrame(columns=col_list)
     for result in results:
+        if not result["error"]:
+            ok_files.append(result["file_name"])
+            result_df = result["output"]
             result_df.columns = col_list
             tx_list = pd.concat([tx_list, result_df], ignore_index=True)
         else:
+            ko_files.append(result["file_name"])
+            error_messages.append(f"{result['file_name']}: {result['error']}")
+    # Save to database
+    # FIXME: get user_id from session
+    txn_list_to_save = [TransactionCreate(**row.to_dict(), user_id=1) for _, row in tx_list.iterrows()]
+    await Transaction.bulk_create(txn_list_to_save)
+    new_ref_data = tx_list[["name/description", "category"]]
     if os.path.exists(CATEGORY_REFERENCE_OUTPUT_FILE):
         # If it exists, add master file to interim results
+        old_ref_data = pd.read_csv(CATEGORY_REFERENCE_OUTPUT_FILE, names=["name/description", "category"], header=0)
         new_ref_data = pd.concat([old_ref_data, new_ref_data], ignore_index=True)
     # Drop duplicates, sort, and write to create new Master File
+    new_ref_data.drop_duplicates(subset=["name/description"]).sort_values(by=["name/description"]).to_csv(
+        CATEGORY_REFERENCE_OUTPUT_FILE, mode="w", index=False, header=True
+    )
     # Summarize results
     print(f"\nProcessed {len(results)} files: {len(ok_files)} successful, {len(ko_files)} with errors\n")
         print(f"Errors in the following files:")
         for message in error_messages:
             print(f"  {message}")
+        print("\n")

app/model/transaction.py CHANGED Viewed

@@ -7,6 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.model.base import BaseModel
 from app.engine.postgresdb import Base
 class Transaction(Base, BaseModel):
@@ -23,11 +24,17 @@ class Transaction(Base, BaseModel):
     @classmethod
     async def create(cls: "type[Transaction]", db: AsyncSession, **kwargs) -> "Transaction":
         query = sql.insert(cls).values(**kwargs).returning(cls.id)
-        transactions = await db.scalars(query)
         transaction = transactions.first()
         await db.commit()
         return transaction
     @classmethod
     async def update(cls: "type[Transaction]", db: AsyncSession, id: int, **kwargs) -> "Transaction":
         query = sql.update(cls).where(cls.id == id).values(**kwargs).execution_options(synchronize_session="fetch")

 from app.model.base import BaseModel
 from app.engine.postgresdb import Base
+from app.schema.index import TransactionCreate
 class Transaction(Base, BaseModel):
     @classmethod
     async def create(cls: "type[Transaction]", db: AsyncSession, **kwargs) -> "Transaction":
         query = sql.insert(cls).values(**kwargs).returning(cls.id)
+        transactions = await db.execute(query)
         transaction = transactions.first()
         await db.commit()
         return transaction
+    @classmethod
+    async def bulk_create(cls: "type[Transaction]", db: AsyncSession, transactions: List[TransactionCreate]) -> None:
+        query = sql.insert(cls).values(transactions)
+        await db.execute(query)
+        await db.commit()
     @classmethod
     async def update(cls: "type[Transaction]", db: AsyncSession, id: int, **kwargs) -> "Transaction":
         query = sql.update(cls).where(cls.id == id).values(**kwargs).execution_options(synchronize_session="fetch")

app/schema/index.py CHANGED Viewed

@@ -45,5 +45,9 @@ class TransactionResponse(PydanticBaseModel):
     type: TransactionType
 class Transaction(TransactionResponse):
     user: User

     type: TransactionType
+class TransactionCreate(TransactionResponse):
+    user_id: int
 class Transaction(TransactionResponse):
     user: User

app/transactions_rag/categorize_transactions.ipynb CHANGED Viewed

@@ -529,7 +529,7 @@
     "    print(\"\\nProcessing file\")\n",
     "    result = await asyncio.gather(processed_file)\n",
     "\n",
-    "    save_results(result)\n",
     "    print(result)\n",
     "\n",
     "    output_file = open(CATEGORY_REFERENCE_OUTPUT_FILE, \"r+\")\n",
@@ -537,8 +537,7 @@
     "\n",
     "\n",
     "result = await apply_categorization()\n",
-    "print(result)\n",
-    "\n"
    ]
   }
  ],

     "    print(\"\\nProcessing file\")\n",
     "    result = await asyncio.gather(processed_file)\n",
     "\n",
+    "    await save_results(result)\n",
     "    print(result)\n",
     "\n",
     "    output_file = open(CATEGORY_REFERENCE_OUTPUT_FILE, \"r+\")\n",
     "\n",
     "\n",
     "result = await apply_categorization()\n",
+    "print(result)\n"
    ]
   }
  ],