Sahil Garg commited on
Commit
c00e175
·
1 Parent(s): 6611563

openrouter replaced by mistral

Browse files
notes/llm_notes_generator.py CHANGED
@@ -34,8 +34,8 @@ logger = logging.getLogger(__name__)
34
 
35
  class Settings(BaseSettings):
36
  """Application settings loaded from environment variables or .env file."""
37
- openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
38
- api_url: str = "https://openrouter.ai/api/v1/chat/completions"
39
  output_dir: str = "data/generated_notes"
40
  trial_balance_json: str = "data/output1/parsed_trial_balance.json"
41
 
@@ -61,22 +61,20 @@ class GeneratedNote(BaseModel):
61
 
62
  class FlexibleFinancialNoteGenerator:
63
  def __init__(self):
64
- self.openrouter_api_key = settings.openrouter_api_key
65
- if not self.openrouter_api_key:
66
- logger.error("OPENROUTER_API_KEY not found in .env file")
67
- raise ValueError("OPENROUTER_API_KEY not found in .env file")
68
  self.api_url = settings.api_url
69
  self.headers = {
70
- "Authorization": f"Bearer {self.openrouter_api_key}",
71
- "Content-Type": "application/json",
72
- "HTTP-Referer": "https://localhost:3000",
73
- "X-Title": "Financial Note Generator"
74
  }
75
  self.note_templates = self.load_note_templates()
76
  self.account_patterns = self._init_account_patterns()
77
  self.recommended_models = [
78
- "mistralai/mixtral-8x7b-instruct",
79
- "mistralai/mistral-7b-instruct-v0.2"
80
  ]
81
 
82
  def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
@@ -340,8 +338,8 @@ class FlexibleFinancialNoteGenerator:
340
 
341
  return prompt
342
 
343
- def call_openrouter_api(self, prompt: str) -> Optional[str]:
344
- """Make API call to OpenRouter with model fallback"""
345
  for model in self.recommended_models:
346
  logger.info(f"Trying model: {model}")
347
  payload = {
@@ -359,7 +357,7 @@ class FlexibleFinancialNoteGenerator:
359
  self.api_url,
360
  headers=self.headers,
361
  json=payload,
362
- timeout=30 # <-- Add timeout here!
363
  )
364
  response.raise_for_status()
365
  result = response.json()
@@ -450,7 +448,7 @@ class FlexibleFinancialNoteGenerator:
450
  logger.error("Failed to build prompt")
451
  return False
452
 
453
- response = self.call_openrouter_api(prompt)
454
  if not response:
455
  logger.error("Failed to get API response")
456
  return False
@@ -475,7 +473,7 @@ class FlexibleFinancialNoteGenerator:
475
  if not prompt:
476
  results[note_number] = False
477
  continue
478
- response = self.call_openrouter_api(prompt)
479
  if not response:
480
  results[note_number] = False
481
  continue
 
34
 
35
  class Settings(BaseSettings):
36
  """Application settings loaded from environment variables or .env file."""
37
+ mistral_api_key: str = os.getenv('MISTRAL_API_KEY', '')
38
+ api_url: str = "https://api.mistral.ai/v1/chat/completions"
39
  output_dir: str = "data/generated_notes"
40
  trial_balance_json: str = "data/output1/parsed_trial_balance.json"
41
 
 
61
 
62
  class FlexibleFinancialNoteGenerator:
63
  def __init__(self):
64
+ self.mistral_api_key = settings.mistral_api_key
65
+ if not self.mistral_api_key:
66
+ logger.error("MISTRAL_API_KEY not found in .env file")
67
+ raise ValueError("MISTRAL_API_KEY not found in .env file")
68
  self.api_url = settings.api_url
69
  self.headers = {
70
+ "Authorization": f"Bearer {self.mistral_api_key}",
71
+ "Content-Type": "application/json"
 
 
72
  }
73
  self.note_templates = self.load_note_templates()
74
  self.account_patterns = self._init_account_patterns()
75
  self.recommended_models = [
76
+ "mistral-large-latest",
77
+ "mistral-medium-latest"
78
  ]
79
 
80
  def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
 
338
 
339
  return prompt
340
 
341
+ def call_mistral_api(self, prompt: str) -> Optional[str]:
342
+ """Make API call to Mistral with model fallback"""
343
  for model in self.recommended_models:
344
  logger.info(f"Trying model: {model}")
345
  payload = {
 
357
  self.api_url,
358
  headers=self.headers,
359
  json=payload,
360
+ timeout=30
361
  )
362
  response.raise_for_status()
363
  result = response.json()
 
448
  logger.error("Failed to build prompt")
449
  return False
450
 
451
+ response = self.call_mistral_api(prompt)
452
  if not response:
453
  logger.error("Failed to get API response")
454
  return False
 
473
  if not prompt:
474
  results[note_number] = False
475
  continue
476
+ response = self.call_mistral_api(prompt)
477
  if not response:
478
  results[note_number] = False
479
  continue
notes/utils/__init__.py DELETED
File without changes
notes/utils/utils.py DELETED
@@ -1,57 +0,0 @@
1
- import logging
2
- from typing import Any, Union
3
-
4
- # Configure logging
5
- logging.basicConfig(level=logging.INFO)
6
- logger = logging.getLogger(__name__)
7
-
8
- def clean_value(value: Union[str, float, int, None]) -> float:
9
- """
10
- Clean and convert a value to float.
11
- Removes commas from strings and strips whitespace.
12
- Returns 0.0 if conversion fails.
13
- """
14
- try:
15
- if isinstance(value, str):
16
- value = value.replace(',', '').strip()
17
- return float(value) if value else 0.0
18
- except (ValueError, TypeError):
19
- logger.debug(f"Could not clean value: {value}")
20
- return 0.0
21
-
22
- def to_lakhs(value: Union[float, int, str]) -> float:
23
- """
24
- Convert a numeric value to lakhs (divide by 100,000 and round to 2 decimals).
25
- Accepts int, float, or numeric string.
26
- """
27
- try:
28
- if isinstance(value, str):
29
- value = float(value.replace(',', '').strip())
30
- return round(float(value) / 100000, 2)
31
- except (ValueError, TypeError):
32
- logger.debug(f"Could not convert to lakhs: {value}")
33
- return 0.0
34
-
35
- def convert_note_json_to_lakhs(note_json: Any) -> Any:
36
- """
37
- Recursively convert all numeric values in a note JSON to lakhs.
38
- Returns the converted object.
39
- """
40
- def convert(obj: Any) -> Any:
41
- if isinstance(obj, dict):
42
- for k, v in obj.items():
43
- if isinstance(v, (int, float)):
44
- obj[k] = to_lakhs(v)
45
- elif isinstance(v, str):
46
- try:
47
- obj[k] = to_lakhs(float(v.replace(',', '')))
48
- except Exception:
49
- obj[k] = v
50
- else:
51
- obj[k] = convert(v)
52
- elif isinstance(obj, list):
53
- for i in range(len(obj)):
54
- obj[i] = convert(obj[i])
55
- return obj
56
-
57
- return convert(note_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notes/utils/utils_normalize.py DELETED
@@ -1,60 +0,0 @@
1
- import logging
2
- from typing import Any, Dict, List, Optional
3
- from pydantic import BaseModel, ValidationError
4
-
5
- # Configure logging
6
- logging.basicConfig(level=logging.INFO)
7
- logger = logging.getLogger(__name__)
8
-
9
- class NormalizedNote(BaseModel):
10
- note_number: Optional[str]
11
- note_title: Optional[str]
12
- full_title: Optional[str]
13
- table_data: List[Dict[str, Any]]
14
- breakdown: Dict[str, Any] = {}
15
- matched_accounts: List[Any] = []
16
- total_amount: Optional[float] = None
17
- total_amount_lakhs: Optional[float] = None
18
- matched_accounts_count: Optional[int] = None
19
- comparative_data: Dict[str, Any] = {}
20
- notes_and_disclosures: List[str] = []
21
- markdown_content: Optional[str] = ""
22
-
23
- def is_date_label(label: str) -> bool:
24
- """Check if a label is a date string."""
25
- import re
26
- return bool(re.match(r"^(March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}$", label)) \
27
- or bool(re.match(r"^\d{4}-\d{2}-\d{2}$", label))
28
-
29
- def normalize_llm_note_json(llm_json: Dict[str, Any]) -> Dict[str, Any]:
30
- """
31
- Normalize a single LLM-generated note JSON to standard format.
32
- Returns a dict compatible with NormalizedNote.
33
- """
34
- note_number = llm_json.get("note_number") or llm_json.get("metadata", {}).get("note_number", "")
35
- note_title = llm_json.get("note_title") or llm_json.get("title", "")
36
- full_title = llm_json.get("full_title") or (f"{note_number}. {note_title}" if note_number else note_title)
37
-
38
- table_data: List[Dict[str, Any]] = []
39
-
40
- if "structure" in llm_json and llm_json["structure"]:
41
- for item in llm_json["structure"]:
42
- if "subcategories" in item and item["subcategories"]:
43
- for sub in item["subcategories"]:
44
- label = sub.get("label", "")
45
- if not is_date_label(label):
46
- row = {
47
- "particulars": label,
48
- "current_year": sub.get("value", ""),
49
- "previous_year": sub.get("previous_value", "-"),
50
- }
51
- table_data.append(row)
52
- if "category" in item and ("total" in item or "previous_total" in item):
53
- row = {
54
- "particulars": f"Total {item.get('category', '')}",
55
- "current_year": item.get("total", ""),
56
- "previous_year": item.get("previous_total", "-"),
57
- }
58
- table_data.append(row)
59
-
60
- # Optionally, add a header row