CodeCommunity commited on
Commit
2286df7
·
verified ·
1 Parent(s): 099f735

Create app/core/model_loader.py

Browse files
Files changed (1) hide show
  1. app/core/model_loader.py +242 -0
app/core/model_loader.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/core/model_loader.py
2
+ import os
3
+ import json
4
+ import logging
5
+ import time
6
+ from datetime import datetime
7
+ import google.generativeai as genai
8
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
9
+ from dotenv import load_dotenv
10
+
11
+ # Force load .env immediately upon module import
12
+ load_dotenv()
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ STATS_FILE = "usage_stats.json"
17
+
18
+ class LLMSingleton:
19
+ _instance = None
20
+
21
+ @classmethod
22
+ def get_instance(cls):
23
+ if cls._instance is None:
24
+ cls._instance = cls()
25
+ return cls._instance
26
+
27
+ def __init__(self):
28
+ if self._instance is not None:
29
+ raise Exception("Singleton instance already exists!")
30
+
31
+ self.api_key = os.getenv("GEMINI_API_KEY")
32
+ if not self.api_key:
33
+ logger.warning("⚠️ GEMINI_API_KEY not found in environment variables. AI features will fail.")
34
+ else:
35
+ genai.configure(api_key=self.api_key)
36
+
37
+ # Gemini Flash Latest: Stable alias (likely 1.5 Flash)
38
+ # Explicitly pinning to 1.5-flash to guarantee the 1500 RPD free tier
39
+ self.model_name = "gemini-flash-latest"
40
+ self.generation_config = {
41
+ "temperature": 0.3,
42
+ "top_p": 0.95,
43
+ "top_k": 64,
44
+ "max_output_tokens": 8192,
45
+ "response_mime_type": "application/json",
46
+ }
47
+ self.safety_settings = {
48
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
49
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
50
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
51
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
52
+ }
53
+
54
+ # Load persistent stats
55
+ self.stats = self._load_stats()
56
+ self._check_daily_reset()
57
+
58
+ # RPM Tracking
59
+ self.rpm_limit = 15
60
+ self.minute_window_start = time.time()
61
+ self.requests_this_minute = 0
62
+
63
+ def _load_stats(self):
64
+ default_stats = {
65
+ "total_requests": 0,
66
+ "successful_requests": 0,
67
+ "rate_limit_hits": 0,
68
+ "input_tokens": 0,
69
+ "output_tokens": 0,
70
+ "errors": 0,
71
+ "local_model_requests": 0,
72
+ "date": datetime.now().strftime("%Y-%m-%d"),
73
+ "daily_requests_count": 0
74
+ }
75
+ if os.path.exists(STATS_FILE):
76
+ try:
77
+ with open(STATS_FILE, "r") as f:
78
+ data = json.load(f)
79
+ # Merge defaults for backward compatibility
80
+ return {**default_stats, **data}
81
+ except Exception as e:
82
+ logger.error(f"Failed to load stats: {e}")
83
+ return default_stats
84
+
85
+ def _save_stats(self):
86
+ try:
87
+ with open(STATS_FILE, "w") as f:
88
+ json.dump(self.stats, f, indent=2)
89
+ except Exception as e:
90
+ logger.error(f"Failed to save stats: {e}")
91
+
92
+ def _check_daily_reset(self):
93
+ today = datetime.now().strftime("%Y-%m-%d")
94
+ if self.stats.get("date") != today:
95
+ logger.info("📅 New day detected. Resetting daily AI quotas.")
96
+ self.stats["date"] = today
97
+ self.stats["daily_requests_count"] = 0
98
+ # We don't reset total_requests to keep lifetime stats, or we could.
99
+ # Let's keep lifetime stats in 'total_requests' and daily in 'daily_requests_count'.
100
+ self._save_stats()
101
+
102
+ def _check_rpm_window(self):
103
+ """
104
+ Resets the minute counter if 60 seconds have passed.
105
+ """
106
+ now = time.time()
107
+ if now - self.minute_window_start >= 60:
108
+ self.minute_window_start = now
109
+ self.requests_this_minute = 0
110
+
111
+ def get_usage_stats(self):
112
+ self._check_daily_reset()
113
+ self._check_rpm_window()
114
+ stats = self.stats.copy()
115
+
116
+ # Limits for Gemini 3 Flash (Free Tier)
117
+ daily_limit = 500
118
+
119
+ stats["limits"] = {
120
+ "requests_per_minute": self.rpm_limit,
121
+ "requests_per_day": daily_limit
122
+ }
123
+ stats["remaining_daily_requests"] = max(0, daily_limit - stats["daily_requests_count"])
124
+ stats["remaining_rpm"] = max(0, self.rpm_limit - self.requests_this_minute)
125
+
126
+ return stats
127
+
128
+ def track_local_usage(self, input_chars: int = 0):
129
+ """
130
+ Track usage of local models (like CodeBERT).
131
+ """
132
+ self.stats["local_model_requests"] += 1
133
+ self.stats["input_tokens"] += input_chars // 4
134
+ self._save_stats()
135
+
136
+ def generate(self, prompt: str, max_tokens: int = 8192) -> str:
137
+ import random
138
+
139
+ self._check_daily_reset()
140
+ self._check_rpm_window()
141
+
142
+ if not self.api_key:
143
+ logger.error("Cannot generate: Missing GEMINI_API_KEY")
144
+ return ""
145
+
146
+ # Check daily quota before sending (Gemini 3 Flash Limit)
147
+ if self.stats["daily_requests_count"] >= 500:
148
+ logger.error("❌ Daily Quota Exceeded (500 requests). Request blocked.")
149
+ return ""
150
+
151
+ # Track attempt
152
+ self.requests_this_minute += 1
153
+ self.stats["total_requests"] += 1
154
+ self.stats["daily_requests_count"] += 1
155
+ self._save_stats()
156
+
157
+ # Log the prompt source (first 50 chars) to identify the caller
158
+ logger.info(f"🤖 Generating with Gemini. Prompt start: {prompt[:50]}...")
159
+
160
+ model = genai.GenerativeModel(
161
+ model_name=self.model_name,
162
+ generation_config=self.generation_config,
163
+ safety_settings=self.safety_settings
164
+ )
165
+
166
+ retries = 0
167
+ max_retries = 5
168
+ base_delay = 2
169
+
170
+ while retries <= max_retries:
171
+ try:
172
+ # Estimate input tokens (rough approximation: 4 chars/token)
173
+ self.stats["input_tokens"] += len(prompt) // 4
174
+
175
+ response = model.generate_content(prompt)
176
+
177
+ # Update stats
178
+ self.stats["successful_requests"] += 1
179
+ if response.usage_metadata:
180
+ if response.text:
181
+ self.stats["output_tokens"] += len(response.text) // 4
182
+ else:
183
+ if response.text:
184
+ self.stats["output_tokens"] += len(response.text) // 4
185
+
186
+ self._save_stats()
187
+ return response.text.strip()
188
+ except Exception as e:
189
+ error_str = str(e)
190
+ if "429" in error_str or "quota" in error_str.lower():
191
+ self.stats["rate_limit_hits"] += 1
192
+ self._save_stats()
193
+ wait_time = (base_delay * (2 ** retries)) + random.uniform(0, 1)
194
+ logger.warning(f"⚠️ Rate limit hit. Retrying in {wait_time:.2f}s... (Attempt {retries+1}/{max_retries})")
195
+ time.sleep(wait_time)
196
+ retries += 1
197
+ else:
198
+ self.stats["errors"] += 1
199
+ self._save_stats()
200
+ logger.error(f"Gemini generation failed: {e}")
201
+ return ""
202
+
203
+ self.stats["errors"] += 1
204
+ self._save_stats()
205
+ logger.error("❌ Max retries reached. Request failed.")
206
+ return ""
207
+
208
+ def generate_text(self, prompt: str) -> str:
209
+ """
210
+ Helper for non-JSON text generation (like Guides).
211
+ """
212
+ self._check_daily_reset()
213
+ self._check_rpm_window()
214
+
215
+ if not self.api_key:
216
+ return "Error: Missing API Key."
217
+
218
+ if self.stats["daily_requests_count"] >= 500:
219
+ return "Error: Daily Quota Exceeded."
220
+
221
+ try:
222
+ self.requests_this_minute += 1
223
+ self.stats["total_requests"] += 1
224
+ self.stats["daily_requests_count"] += 1
225
+ self._save_stats()
226
+
227
+ # Override config for text
228
+ config = self.generation_config.copy()
229
+ config["response_mime_type"] = "text/plain"
230
+
231
+ model = genai.GenerativeModel(
232
+ model_name=self.model_name,
233
+ generation_config=config,
234
+ safety_settings=self.safety_settings
235
+ )
236
+ response = model.generate_content(prompt)
237
+ return response.text
238
+ except Exception as e:
239
+ logger.error(f"Gemini text generation failed: {e}")
240
+ return f"Error generating content: {str(e)}"
241
+
242
+ llm_engine = LLMSingleton.get_instance()