LiamKhoaLe commited on
Commit
e6eaeb3
·
1 Parent(s): 3c913ab

Upd agent specs analytics

Browse files
Files changed (4) hide show
  1. helpers/coder.py +16 -2
  2. routes/chats.py +1 -1
  3. routes/search.py +6 -6
  4. utils/api/router.py +33 -5
helpers/coder.py CHANGED
@@ -149,7 +149,7 @@ async def generate_code_artifacts(
149
  )
150
 
151
  # Use the new NVIDIA coder function
152
- code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
153
  code_md = (code_md or "").strip()
154
 
155
  # Track NVIDIA_CODER usage
@@ -178,11 +178,25 @@ async def generate_code_artifacts(
178
  return code_md
179
 
180
 
181
- async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator) -> str:
182
  """
183
  NVIDIA Coder completion using the specified coder model with streaming support.
184
  Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
185
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  key = nvidia_rotator.get_key() or ""
187
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
188
 
 
149
  )
150
 
151
  # Use the new NVIDIA coder function
152
+ code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, "coding")
153
  code_md = (code_md or "").strip()
154
 
155
  # Track NVIDIA_CODER usage
 
178
  return code_md
179
 
180
 
181
+ async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator, user_id: str = None, context: str = "") -> str:
182
  """
183
  NVIDIA Coder completion using the specified coder model with streaming support.
184
  Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
185
  """
186
+ # Track model usage for analytics
187
+ try:
188
+ from utils.analytics import get_analytics_tracker
189
+ tracker = get_analytics_tracker()
190
+ if tracker and user_id:
191
+ await tracker.track_model_usage(
192
+ user_id=user_id,
193
+ model_name="nvidia/coder-8b",
194
+ provider="nvidia_coder",
195
+ context=context or "nvidia_coder_completion",
196
+ metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
197
+ )
198
+ except Exception as e:
199
+ logger.debug(f"[CODER] Analytics tracking failed: {e}")
200
  key = nvidia_rotator.get_key() or ""
201
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
202
 
routes/chats.py CHANGED
@@ -245,7 +245,7 @@ Return only the variations, one per line, no numbering or extra text."""
245
 
246
  # Use Qwen for better query variation generation reasoning
247
  from utils.api.router import qwen_chat_completion
248
- response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
249
 
250
  # Parse variations
251
  variations = [line.strip() for line in response.split('\n') if line.strip()]
 
245
 
246
  # Use Qwen for better query variation generation reasoning
247
  from utils.api.router import qwen_chat_completion
248
+ response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "chat_query_variations")
249
 
250
  # Parse variations
251
  variations = [line.strip() for line in response.split('\n') if line.strip()]
routes/search.py CHANGED
@@ -41,7 +41,7 @@ Return only the keywords, separated by spaces, no other text."""
41
  )
42
 
43
  # Use NVIDIA Large for better keyword extraction
44
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
45
 
46
  keywords = [kw.strip() for kw in response.split() if kw.strip()]
47
  return keywords[:5] if keywords else [user_query]
@@ -77,7 +77,7 @@ Return as JSON array of objects."""
77
  user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
78
 
79
  # Use NVIDIA Large for better strategy generation
80
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
81
 
82
  try:
83
  strategies = json.loads(response)
@@ -337,7 +337,7 @@ Return only the relevant content, no additional commentary."""
337
  user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
338
 
339
  # Use NVIDIA Large for better content extraction
340
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
341
 
342
  return response.strip() if response.strip() else ""
343
 
@@ -362,7 +362,7 @@ Consider: accuracy, completeness, clarity, authority, recency, bias, factual cla
362
  user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
363
 
364
  # Use NVIDIA Large for better quality assessment
365
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
366
 
367
  try:
368
  # Try to parse JSON response
@@ -426,7 +426,7 @@ Focus on factual claims, statistics, and verifiable information."""
426
  user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
427
 
428
  # Use NVIDIA Large for better cross-validation
429
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
430
 
431
  try:
432
  validation = json.loads(response)
@@ -493,7 +493,7 @@ Be clear and direct."""
493
  user_prompt = f"Summarize this content:\n\n{content}"
494
 
495
  # Use NVIDIA Large for better summarization
496
- response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
497
 
498
  return response.strip() if response.strip() else content[:200] + "..."
499
 
 
41
  )
42
 
43
  # Use NVIDIA Large for better keyword extraction
44
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
45
 
46
  keywords = [kw.strip() for kw in response.split() if kw.strip()]
47
  return keywords[:5] if keywords else [user_query]
 
77
  user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
78
 
79
  # Use NVIDIA Large for better strategy generation
80
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
81
 
82
  try:
83
  strategies = json.loads(response)
 
337
  user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
338
 
339
  # Use NVIDIA Large for better content extraction
340
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
341
 
342
  return response.strip() if response.strip() else ""
343
 
 
362
  user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
363
 
364
  # Use NVIDIA Large for better quality assessment
365
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
366
 
367
  try:
368
  # Try to parse JSON response
 
426
  user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
427
 
428
  # Use NVIDIA Large for better cross-validation
429
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
430
 
431
  try:
432
  validation = json.loads(response)
 
493
  user_prompt = f"Summarize this content:\n\n{content}"
494
 
495
  # Use NVIDIA Large for better summarization
496
+ response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
497
 
498
  return response.strip() if response.strip() else content[:200] + "..."
499
 
utils/api/router.py CHANGED
@@ -184,7 +184,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
184
  elif provider == "qwen":
185
  # Use Qwen for reasoning tasks with fallback
186
  try:
187
- return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator)
188
  except Exception as e:
189
  logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
190
  # Fallback: Qwen → NVIDIA_SMALL
@@ -194,7 +194,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
194
  elif provider == "nvidia_large":
195
  # Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
196
  try:
197
- return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator)
198
  except Exception as e:
199
  logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
200
  # Fallback: NVIDIA_LARGE → NVIDIA_SMALL
@@ -205,7 +205,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
205
  # Use NVIDIA Coder for code generation tasks with fallback
206
  try:
207
  from helpers.coder import nvidia_coder_completion
208
- return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
209
  except Exception as e:
210
  logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
211
  # Fallback: NVIDIA_CODER → NVIDIA_SMALL
@@ -216,11 +216,25 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
216
  return "Unsupported provider."
217
 
218
 
219
- async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
220
  """
221
  Qwen chat completion with thinking mode enabled.
222
  Uses the NVIDIA API rotator for key management.
223
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  key = nvidia_rotator.get_key() or ""
225
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
226
 
@@ -294,11 +308,25 @@ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rota
294
  return "I couldn't process the request with Qwen model."
295
 
296
 
297
- async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
298
  """
299
  NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
300
  Uses the NVIDIA API rotator for key management.
301
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  key = nvidia_rotator.get_key() or ""
303
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
304
 
 
184
  elif provider == "qwen":
185
  # Use Qwen for reasoning tasks with fallback
186
  try:
187
+ return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
188
  except Exception as e:
189
  logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
190
  # Fallback: Qwen → NVIDIA_SMALL
 
194
  elif provider == "nvidia_large":
195
  # Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
196
  try:
197
+ return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
198
  except Exception as e:
199
  logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
200
  # Fallback: NVIDIA_LARGE → NVIDIA_SMALL
 
205
  # Use NVIDIA Coder for code generation tasks with fallback
206
  try:
207
  from helpers.coder import nvidia_coder_completion
208
+ return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
209
  except Exception as e:
210
  logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
211
  # Fallback: NVIDIA_CODER → NVIDIA_SMALL
 
216
  return "Unsupported provider."
217
 
218
 
219
+ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
220
  """
221
  Qwen chat completion with thinking mode enabled.
222
  Uses the NVIDIA API rotator for key management.
223
  """
224
+ # Track model usage for analytics
225
+ try:
226
+ from utils.analytics import get_analytics_tracker
227
+ tracker = get_analytics_tracker()
228
+ if tracker and user_id:
229
+ await tracker.track_model_usage(
230
+ user_id=user_id,
231
+ model_name="meta/llama-3.1-8b-instruct",
232
+ provider="nvidia",
233
+ context=context or "qwen_completion",
234
+ metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
235
+ )
236
+ except Exception as e:
237
+ logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
238
  key = nvidia_rotator.get_key() or ""
239
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
240
 
 
308
  return "I couldn't process the request with Qwen model."
309
 
310
 
311
+ async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
312
  """
313
  NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
314
  Uses the NVIDIA API rotator for key management.
315
  """
316
+ # Track model usage for analytics
317
+ try:
318
+ from utils.analytics import get_analytics_tracker
319
+ tracker = get_analytics_tracker()
320
+ if tracker and user_id:
321
+ await tracker.track_model_usage(
322
+ user_id=user_id,
323
+ model_name="openai/gpt-oss-120b",
324
+ provider="nvidia_large",
325
+ context=context or "nvidia_large_completion",
326
+ metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
327
+ )
328
+ except Exception as e:
329
+ logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
330
  key = nvidia_rotator.get_key() or ""
331
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
332