Spaces:
Running
Running
# config.py | |
from pydantic import BaseModel | |
from datetime import date | |
from typing import List | |
# Options: "rapid", "easy", "ocrmac", "tesseract" | |
OCR_MODEL = "easy" | |
# Must be set when using the tesseract OCR model | |
# Linux: "/usr/share/tesseract-ocr/4.00/tessdata" | |
# Windows: "C:\\Program Files\\Tesseract-OCR\\tessdata" | |
# Mac: "/usr/local/share/tessdata" or "/opt/homebrew/share/tessdata" | |
TESSERACT_TESSDATA_LOCATION = "/usr/share/tesseract-ocr/4.00/tessdata" | |
OLLAMA_MODEL = "llama3:instruct" | |
LLM_PROMPT = """ | |
Extract all transactions from the following statement. Each transaction must be returned as a JSON object with the fields: transaction_date (YYYY-MM-DD), description, amount, and transaction_type ('deposit' or 'withdrawal'). All of these must be returned as a list of JSON objects under a key called 'transactions'. Here is an example: | |
[ | |
{ | |
transaction_date: 2025-01-24, | |
description: "Walmart", | |
amount: 34.24, | |
transaction_type: "withdrawl" | |
} | |
] | |
""" | |
# Options: "csv", "json", "excel" | |
EXPORT_TYPE = "json" | |
# Can be a file or directory | |
INPUT_PATH = "" | |
OUTPUT_FOLDER = "" | |
OUTPUT_FILE_NAME = "output" | |
# Define Pydantic response models for instructor: | |
class BankStatementEntry(BaseModel): | |
transaction_date: date | None | str | |
description: str | None | |
amount: float | None | |
#transaction_type: Literal['deposit', 'withdrawal', None] | |
transaction_type: str | None | |
class BankStatement(BaseModel): | |
transactions: List[BankStatementEntry] | None | |
# The model that LLM output will conform to | |
RESPONSE_MODEL = BankStatement | |