Spaces:
Sleeping
Sleeping
Sahil Garg
commited on
Commit
·
c00e175
1
Parent(s):
6611563
openrouter replaced by mistral
Browse files- notes/llm_notes_generator.py +15 -17
- notes/utils/__init__.py +0 -0
- notes/utils/utils.py +0 -57
- notes/utils/utils_normalize.py +0 -60
notes/llm_notes_generator.py
CHANGED
|
@@ -34,8 +34,8 @@ logger = logging.getLogger(__name__)
|
|
| 34 |
|
| 35 |
class Settings(BaseSettings):
|
| 36 |
"""Application settings loaded from environment variables or .env file."""
|
| 37 |
-
|
| 38 |
-
api_url: str = "https://
|
| 39 |
output_dir: str = "data/generated_notes"
|
| 40 |
trial_balance_json: str = "data/output1/parsed_trial_balance.json"
|
| 41 |
|
|
@@ -61,22 +61,20 @@ class GeneratedNote(BaseModel):
|
|
| 61 |
|
| 62 |
class FlexibleFinancialNoteGenerator:
|
| 63 |
def __init__(self):
|
| 64 |
-
self.
|
| 65 |
-
if not self.
|
| 66 |
-
logger.error("
|
| 67 |
-
raise ValueError("
|
| 68 |
self.api_url = settings.api_url
|
| 69 |
self.headers = {
|
| 70 |
-
"Authorization": f"Bearer {self.
|
| 71 |
-
"Content-Type": "application/json"
|
| 72 |
-
"HTTP-Referer": "https://localhost:3000",
|
| 73 |
-
"X-Title": "Financial Note Generator"
|
| 74 |
}
|
| 75 |
self.note_templates = self.load_note_templates()
|
| 76 |
self.account_patterns = self._init_account_patterns()
|
| 77 |
self.recommended_models = [
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
]
|
| 81 |
|
| 82 |
def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
|
|
@@ -340,8 +338,8 @@ class FlexibleFinancialNoteGenerator:
|
|
| 340 |
|
| 341 |
return prompt
|
| 342 |
|
| 343 |
-
def
|
| 344 |
-
"""Make API call to
|
| 345 |
for model in self.recommended_models:
|
| 346 |
logger.info(f"Trying model: {model}")
|
| 347 |
payload = {
|
|
@@ -359,7 +357,7 @@ class FlexibleFinancialNoteGenerator:
|
|
| 359 |
self.api_url,
|
| 360 |
headers=self.headers,
|
| 361 |
json=payload,
|
| 362 |
-
timeout=30
|
| 363 |
)
|
| 364 |
response.raise_for_status()
|
| 365 |
result = response.json()
|
|
@@ -450,7 +448,7 @@ class FlexibleFinancialNoteGenerator:
|
|
| 450 |
logger.error("Failed to build prompt")
|
| 451 |
return False
|
| 452 |
|
| 453 |
-
response = self.
|
| 454 |
if not response:
|
| 455 |
logger.error("Failed to get API response")
|
| 456 |
return False
|
|
@@ -475,7 +473,7 @@ class FlexibleFinancialNoteGenerator:
|
|
| 475 |
if not prompt:
|
| 476 |
results[note_number] = False
|
| 477 |
continue
|
| 478 |
-
response = self.
|
| 479 |
if not response:
|
| 480 |
results[note_number] = False
|
| 481 |
continue
|
|
|
|
| 34 |
|
| 35 |
class Settings(BaseSettings):
|
| 36 |
"""Application settings loaded from environment variables or .env file."""
|
| 37 |
+
mistral_api_key: str = os.getenv('MISTRAL_API_KEY', '')
|
| 38 |
+
api_url: str = "https://api.mistral.ai/v1/chat/completions"
|
| 39 |
output_dir: str = "data/generated_notes"
|
| 40 |
trial_balance_json: str = "data/output1/parsed_trial_balance.json"
|
| 41 |
|
|
|
|
| 61 |
|
| 62 |
class FlexibleFinancialNoteGenerator:
|
| 63 |
def __init__(self):
|
| 64 |
+
self.mistral_api_key = settings.mistral_api_key
|
| 65 |
+
if not self.mistral_api_key:
|
| 66 |
+
logger.error("MISTRAL_API_KEY not found in .env file")
|
| 67 |
+
raise ValueError("MISTRAL_API_KEY not found in .env file")
|
| 68 |
self.api_url = settings.api_url
|
| 69 |
self.headers = {
|
| 70 |
+
"Authorization": f"Bearer {self.mistral_api_key}",
|
| 71 |
+
"Content-Type": "application/json"
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
self.note_templates = self.load_note_templates()
|
| 74 |
self.account_patterns = self._init_account_patterns()
|
| 75 |
self.recommended_models = [
|
| 76 |
+
"mistral-large-latest",
|
| 77 |
+
"mistral-medium-latest"
|
| 78 |
]
|
| 79 |
|
| 80 |
def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
|
|
|
|
| 338 |
|
| 339 |
return prompt
|
| 340 |
|
| 341 |
+
def call_mistral_api(self, prompt: str) -> Optional[str]:
|
| 342 |
+
"""Make API call to Mistral with model fallback"""
|
| 343 |
for model in self.recommended_models:
|
| 344 |
logger.info(f"Trying model: {model}")
|
| 345 |
payload = {
|
|
|
|
| 357 |
self.api_url,
|
| 358 |
headers=self.headers,
|
| 359 |
json=payload,
|
| 360 |
+
timeout=30
|
| 361 |
)
|
| 362 |
response.raise_for_status()
|
| 363 |
result = response.json()
|
|
|
|
| 448 |
logger.error("Failed to build prompt")
|
| 449 |
return False
|
| 450 |
|
| 451 |
+
response = self.call_mistral_api(prompt)
|
| 452 |
if not response:
|
| 453 |
logger.error("Failed to get API response")
|
| 454 |
return False
|
|
|
|
| 473 |
if not prompt:
|
| 474 |
results[note_number] = False
|
| 475 |
continue
|
| 476 |
+
response = self.call_mistral_api(prompt)
|
| 477 |
if not response:
|
| 478 |
results[note_number] = False
|
| 479 |
continue
|
notes/utils/__init__.py
DELETED
|
File without changes
|
notes/utils/utils.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
from typing import Any, Union
|
| 3 |
-
|
| 4 |
-
# Configure logging
|
| 5 |
-
logging.basicConfig(level=logging.INFO)
|
| 6 |
-
logger = logging.getLogger(__name__)
|
| 7 |
-
|
| 8 |
-
def clean_value(value: Union[str, float, int, None]) -> float:
|
| 9 |
-
"""
|
| 10 |
-
Clean and convert a value to float.
|
| 11 |
-
Removes commas from strings and strips whitespace.
|
| 12 |
-
Returns 0.0 if conversion fails.
|
| 13 |
-
"""
|
| 14 |
-
try:
|
| 15 |
-
if isinstance(value, str):
|
| 16 |
-
value = value.replace(',', '').strip()
|
| 17 |
-
return float(value) if value else 0.0
|
| 18 |
-
except (ValueError, TypeError):
|
| 19 |
-
logger.debug(f"Could not clean value: {value}")
|
| 20 |
-
return 0.0
|
| 21 |
-
|
| 22 |
-
def to_lakhs(value: Union[float, int, str]) -> float:
|
| 23 |
-
"""
|
| 24 |
-
Convert a numeric value to lakhs (divide by 100,000 and round to 2 decimals).
|
| 25 |
-
Accepts int, float, or numeric string.
|
| 26 |
-
"""
|
| 27 |
-
try:
|
| 28 |
-
if isinstance(value, str):
|
| 29 |
-
value = float(value.replace(',', '').strip())
|
| 30 |
-
return round(float(value) / 100000, 2)
|
| 31 |
-
except (ValueError, TypeError):
|
| 32 |
-
logger.debug(f"Could not convert to lakhs: {value}")
|
| 33 |
-
return 0.0
|
| 34 |
-
|
| 35 |
-
def convert_note_json_to_lakhs(note_json: Any) -> Any:
|
| 36 |
-
"""
|
| 37 |
-
Recursively convert all numeric values in a note JSON to lakhs.
|
| 38 |
-
Returns the converted object.
|
| 39 |
-
"""
|
| 40 |
-
def convert(obj: Any) -> Any:
|
| 41 |
-
if isinstance(obj, dict):
|
| 42 |
-
for k, v in obj.items():
|
| 43 |
-
if isinstance(v, (int, float)):
|
| 44 |
-
obj[k] = to_lakhs(v)
|
| 45 |
-
elif isinstance(v, str):
|
| 46 |
-
try:
|
| 47 |
-
obj[k] = to_lakhs(float(v.replace(',', '')))
|
| 48 |
-
except Exception:
|
| 49 |
-
obj[k] = v
|
| 50 |
-
else:
|
| 51 |
-
obj[k] = convert(v)
|
| 52 |
-
elif isinstance(obj, list):
|
| 53 |
-
for i in range(len(obj)):
|
| 54 |
-
obj[i] = convert(obj[i])
|
| 55 |
-
return obj
|
| 56 |
-
|
| 57 |
-
return convert(note_json)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
notes/utils/utils_normalize.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
from typing import Any, Dict, List, Optional
|
| 3 |
-
from pydantic import BaseModel, ValidationError
|
| 4 |
-
|
| 5 |
-
# Configure logging
|
| 6 |
-
logging.basicConfig(level=logging.INFO)
|
| 7 |
-
logger = logging.getLogger(__name__)
|
| 8 |
-
|
| 9 |
-
class NormalizedNote(BaseModel):
|
| 10 |
-
note_number: Optional[str]
|
| 11 |
-
note_title: Optional[str]
|
| 12 |
-
full_title: Optional[str]
|
| 13 |
-
table_data: List[Dict[str, Any]]
|
| 14 |
-
breakdown: Dict[str, Any] = {}
|
| 15 |
-
matched_accounts: List[Any] = []
|
| 16 |
-
total_amount: Optional[float] = None
|
| 17 |
-
total_amount_lakhs: Optional[float] = None
|
| 18 |
-
matched_accounts_count: Optional[int] = None
|
| 19 |
-
comparative_data: Dict[str, Any] = {}
|
| 20 |
-
notes_and_disclosures: List[str] = []
|
| 21 |
-
markdown_content: Optional[str] = ""
|
| 22 |
-
|
| 23 |
-
def is_date_label(label: str) -> bool:
|
| 24 |
-
"""Check if a label is a date string."""
|
| 25 |
-
import re
|
| 26 |
-
return bool(re.match(r"^(March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}$", label)) \
|
| 27 |
-
or bool(re.match(r"^\d{4}-\d{2}-\d{2}$", label))
|
| 28 |
-
|
| 29 |
-
def normalize_llm_note_json(llm_json: Dict[str, Any]) -> Dict[str, Any]:
|
| 30 |
-
"""
|
| 31 |
-
Normalize a single LLM-generated note JSON to standard format.
|
| 32 |
-
Returns a dict compatible with NormalizedNote.
|
| 33 |
-
"""
|
| 34 |
-
note_number = llm_json.get("note_number") or llm_json.get("metadata", {}).get("note_number", "")
|
| 35 |
-
note_title = llm_json.get("note_title") or llm_json.get("title", "")
|
| 36 |
-
full_title = llm_json.get("full_title") or (f"{note_number}. {note_title}" if note_number else note_title)
|
| 37 |
-
|
| 38 |
-
table_data: List[Dict[str, Any]] = []
|
| 39 |
-
|
| 40 |
-
if "structure" in llm_json and llm_json["structure"]:
|
| 41 |
-
for item in llm_json["structure"]:
|
| 42 |
-
if "subcategories" in item and item["subcategories"]:
|
| 43 |
-
for sub in item["subcategories"]:
|
| 44 |
-
label = sub.get("label", "")
|
| 45 |
-
if not is_date_label(label):
|
| 46 |
-
row = {
|
| 47 |
-
"particulars": label,
|
| 48 |
-
"current_year": sub.get("value", ""),
|
| 49 |
-
"previous_year": sub.get("previous_value", "-"),
|
| 50 |
-
}
|
| 51 |
-
table_data.append(row)
|
| 52 |
-
if "category" in item and ("total" in item or "previous_total" in item):
|
| 53 |
-
row = {
|
| 54 |
-
"particulars": f"Total {item.get('category', '')}",
|
| 55 |
-
"current_year": item.get("total", ""),
|
| 56 |
-
"previous_year": item.get("previous_total", "-"),
|
| 57 |
-
}
|
| 58 |
-
table_data.append(row)
|
| 59 |
-
|
| 60 |
-
# Optionally, add a header row
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|