LiamKhoaLe commited on
Commit
7a1ebee
·
1 Parent(s): a72fec7

Upd NVIDIA ana

Browse files
helpers/coder.py CHANGED
@@ -97,7 +97,7 @@ async def generate_code_artifacts(
97
  if tracker and user_id:
98
  await tracker.track_model_usage(
99
  user_id=user_id,
100
- model_name="nvidia_large",
101
  provider="nvidia_large",
102
  context="code_analysis",
103
  metadata={"subsection_id": subsection_id}
@@ -157,7 +157,7 @@ async def generate_code_artifacts(
157
  if tracker and user_id:
158
  await tracker.track_model_usage(
159
  user_id=user_id,
160
- model_name=NVIDIA_CODER,
161
  provider="nvidia_coder",
162
  context="report_coding",
163
  metadata={"subsection_id": subsection_id}
@@ -190,7 +190,7 @@ async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_r
190
  if tracker and user_id:
191
  await tracker.track_model_usage(
192
  user_id=user_id,
193
- model_name="nvidia/coder-8b",
194
  provider="nvidia_coder",
195
  context=context or "nvidia_coder_completion",
196
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
 
97
  if tracker and user_id:
98
  await tracker.track_model_usage(
99
  user_id=user_id,
100
+ model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
101
  provider="nvidia_large",
102
  context="code_analysis",
103
  metadata={"subsection_id": subsection_id}
 
157
  if tracker and user_id:
158
  await tracker.track_model_usage(
159
  user_id=user_id,
160
+ model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
161
  provider="nvidia_coder",
162
  context="report_coding",
163
  metadata={"subsection_id": subsection_id}
 
190
  if tracker and user_id:
191
  await tracker.track_model_usage(
192
  user_id=user_id,
193
+ model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
194
  provider="nvidia_coder",
195
  context=context or "nvidia_coder_completion",
196
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
helpers/diagram.py CHANGED
@@ -187,7 +187,9 @@ Please provide the corrected Mermaid code that will render successfully."""
187
 
188
  # Use NVIDIA_LARGE for better error correction
189
  selection = {"provider": "nvidia_large", "model": os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b")}
190
- response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, None, user_id, "diagram_fix")
 
 
191
 
192
  if response:
193
  # Clean up the response
 
187
 
188
  # Use NVIDIA_LARGE for better error correction
189
  selection = {"provider": "nvidia_large", "model": os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b")}
190
+ # Import rotators from setup
191
+ from helpers.setup import gemini_rotator, nvidia_rotator
192
+ response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id, "diagram_fix")
193
 
194
  if response:
195
  # Clean up the response
legacy.py CHANGED
@@ -791,7 +791,7 @@ Return only the variations, one per line, no numbering or extra text."""
791
 
792
  from utils.api.router import generate_answer_with_model
793
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
794
- response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator, user_id="system", context="legacy_analysis")
795
 
796
  # Parse variations
797
  variations = [line.strip() for line in response.split('\n') if line.strip()]
 
791
 
792
  from utils.api.router import generate_answer_with_model
793
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
794
+ response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id="system", context="legacy_analysis")
795
 
796
  # Parse variations
797
  variations = [line.strip() for line in response.split('\n') if line.strip()]
memo/consolidation.py CHANGED
@@ -217,6 +217,21 @@ Create a single consolidated memory:"""
217
  metadata={"count": len(contents)}
218
  )
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  # Use Qwen for better memory consolidation reasoning
221
  from utils.api.router import qwen_chat_completion
222
  consolidated_content = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_consolidation")
 
217
  metadata={"count": len(contents)}
218
  )
219
 
220
+ # Track memo agent usage
221
+ try:
222
+ from utils.analytics import get_analytics_tracker
223
+ tracker = get_analytics_tracker()
224
+ if tracker:
225
+ await tracker.track_agent_usage(
226
+ user_id=user_id,
227
+ agent_name="memo",
228
+ action="consolidate",
229
+ context="memory_consolidation",
230
+ metadata={"memories_count": len(memories)}
231
+ )
232
+ except Exception:
233
+ pass
234
+
235
  # Use Qwen for better memory consolidation reasoning
236
  from utils.api.router import qwen_chat_completion
237
  consolidated_content = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_consolidation")
memo/nvidia.py CHANGED
@@ -29,7 +29,7 @@ async def nvidia_chat(system_prompt: str, user_prompt: str, nvidia_key: str, rot
29
  if tracker:
30
  await tracker.track_model_usage(
31
  user_id=user_id,
32
- model_name=NVIDIA_SMALL,
33
  provider="nvidia",
34
  context=context,
35
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
@@ -59,6 +59,21 @@ async def qwen_chat(system_prompt: str, user_prompt: str, rotator, user_id: str
59
  """
60
  Qwen chat call for medium complexity tasks with thinking mode.
61
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
  return await qwen_chat_completion(system_prompt, user_prompt, rotator, user_id, "memo_qwen_chat")
64
  except Exception as e:
 
29
  if tracker:
30
  await tracker.track_model_usage(
31
  user_id=user_id,
32
+ model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
33
  provider="nvidia",
34
  context=context,
35
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
 
59
  """
60
  Qwen chat call for medium complexity tasks with thinking mode.
61
  """
62
+ # Track memo agent usage
63
+ try:
64
+ from utils.analytics import get_analytics_tracker
65
+ tracker = get_analytics_tracker()
66
+ if tracker:
67
+ await tracker.track_agent_usage(
68
+ user_id=user_id,
69
+ agent_name="memo",
70
+ action="chat",
71
+ context="memo_qwen_chat",
72
+ metadata={"query": user_prompt[:100]}
73
+ )
74
+ except Exception:
75
+ pass
76
+
77
  try:
78
  return await qwen_chat_completion(system_prompt, user_prompt, rotator, user_id, "memo_qwen_chat")
79
  except Exception as e:
memo/plan/execution.py CHANGED
@@ -388,6 +388,21 @@ Select the most relevant Q&A memories:"""
388
  metadata={"question": question[:100], "memories_count": len(memories)}
389
  )
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  # Use Qwen for better memory selection reasoning
392
  from utils.api.router import qwen_chat_completion
393
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_selection")
 
388
  metadata={"question": question[:100], "memories_count": len(memories)}
389
  )
390
 
391
+ # Track memo agent usage
392
+ try:
393
+ from utils.analytics import get_analytics_tracker
394
+ tracker = get_analytics_tracker()
395
+ if tracker:
396
+ await tracker.track_agent_usage(
397
+ user_id=user_id,
398
+ agent_name="memo",
399
+ action="select",
400
+ context="memory_selection",
401
+ metadata={"query": query}
402
+ )
403
+ except Exception:
404
+ pass
405
+
406
  # Use Qwen for better memory selection reasoning
407
  from utils.api.router import qwen_chat_completion
408
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_selection")
memo/plan/intent.py CHANGED
@@ -148,6 +148,21 @@ Respond with only the intent name (e.g., "ENHANCEMENT")."""
148
  metadata={"question": question[:100]}
149
  )
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  # Use Qwen for better intent detection reasoning
152
  from utils.api.router import qwen_chat_completion
153
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "intent_detection")
 
148
  metadata={"question": question[:100]}
149
  )
150
 
151
+ # Track memo agent usage
152
+ try:
153
+ from utils.analytics import get_analytics_tracker
154
+ tracker = get_analytics_tracker()
155
+ if tracker:
156
+ await tracker.track_agent_usage(
157
+ user_id=user_id,
158
+ agent_name="memo",
159
+ action="intent",
160
+ context="intent_detection",
161
+ metadata={"query": query}
162
+ )
163
+ except Exception:
164
+ pass
165
+
166
  # Use Qwen for better intent detection reasoning
167
  from utils.api.router import qwen_chat_completion
168
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "intent_detection")
memo/retrieval.py CHANGED
@@ -259,6 +259,21 @@ Should this question be enhanced with context?"""
259
  metadata={"question": question[:100]}
260
  )
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  # Use Qwen for better context enhancement reasoning
263
  from utils.api.router import qwen_chat_completion
264
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "enhancement_decision")
@@ -343,6 +358,21 @@ Create an enhanced version that incorporates this context naturally."""
343
  metadata={"question": question[:100]}
344
  )
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  # Use Qwen for better question enhancement reasoning
347
  from utils.api.router import qwen_chat_completion
348
  enhanced_question = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "question_enhancement")
@@ -420,6 +450,21 @@ Create an enhanced version that incorporates this context naturally."""
420
  metadata={"instructions": instructions[:100]}
421
  )
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  # Use Qwen for better instruction enhancement reasoning
424
  from utils.api.router import qwen_chat_completion
425
  enhanced_instructions = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "instruction_enhancement")
 
259
  metadata={"question": question[:100]}
260
  )
261
 
262
+ # Track memo agent usage
263
+ try:
264
+ from utils.analytics import get_analytics_tracker
265
+ tracker = get_analytics_tracker()
266
+ if tracker:
267
+ await tracker.track_agent_usage(
268
+ user_id=user_id,
269
+ agent_name="memo",
270
+ action="enhance",
271
+ context="enhancement_decision",
272
+ metadata={"query": query}
273
+ )
274
+ except Exception:
275
+ pass
276
+
277
  # Use Qwen for better context enhancement reasoning
278
  from utils.api.router import qwen_chat_completion
279
  response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "enhancement_decision")
 
358
  metadata={"question": question[:100]}
359
  )
360
 
361
+ # Track memo agent usage
362
+ try:
363
+ from utils.analytics import get_analytics_tracker
364
+ tracker = get_analytics_tracker()
365
+ if tracker:
366
+ await tracker.track_agent_usage(
367
+ user_id=user_id,
368
+ agent_name="memo",
369
+ action="enhance",
370
+ context="question_enhancement",
371
+ metadata={"query": question}
372
+ )
373
+ except Exception:
374
+ pass
375
+
376
  # Use Qwen for better question enhancement reasoning
377
  from utils.api.router import qwen_chat_completion
378
  enhanced_question = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "question_enhancement")
 
450
  metadata={"instructions": instructions[:100]}
451
  )
452
 
453
+ # Track memo agent usage
454
+ try:
455
+ from utils.analytics import get_analytics_tracker
456
+ tracker = get_analytics_tracker()
457
+ if tracker:
458
+ await tracker.track_agent_usage(
459
+ user_id=user_id,
460
+ agent_name="memo",
461
+ action="enhance",
462
+ context="instruction_enhancement",
463
+ metadata={"instructions": instructions}
464
+ )
465
+ except Exception:
466
+ pass
467
+
468
  # Use Qwen for better instruction enhancement reasoning
469
  from utils.api.router import qwen_chat_completion
470
  enhanced_instructions = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "instruction_enhancement")
routes/reports.py CHANGED
@@ -1272,7 +1272,7 @@ Return the renumbered headings in the format: "level: new_number: heading_text"
1272
 
1273
  # Use NVIDIA model for heading re-numbering
1274
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
1275
- response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator, user_id, "report_heading_fix")
1276
 
1277
  # Parse the AI response
1278
  renumbered_headings = []
 
1272
 
1273
  # Use NVIDIA model for heading re-numbering
1274
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
1275
+ response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id, "report_heading_fix")
1276
 
1277
  # Parse the AI response
1278
  renumbered_headings = []
utils/api/router.py CHANGED
@@ -228,7 +228,7 @@ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rota
228
  if tracker and user_id:
229
  await tracker.track_model_usage(
230
  user_id=user_id,
231
- model_name="meta/llama-3.1-8b-instruct",
232
  provider="nvidia",
233
  context=context or "qwen_completion",
234
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
@@ -320,7 +320,7 @@ async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvi
320
  if tracker and user_id:
321
  await tracker.track_model_usage(
322
  user_id=user_id,
323
- model_name="openai/gpt-oss-120b",
324
  provider="nvidia_large",
325
  context=context or "nvidia_large_completion",
326
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
 
228
  if tracker and user_id:
229
  await tracker.track_model_usage(
230
  user_id=user_id,
231
+ model_name=os.getenv("NVIDIA_MEDIUM", "qwen/qwen3-next-80b-a3b-thinking"),
232
  provider="nvidia",
233
  context=context or "qwen_completion",
234
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
 
320
  if tracker and user_id:
321
  await tracker.track_model_usage(
322
  user_id=user_id,
323
+ model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
324
  provider="nvidia_large",
325
  context=context or "nvidia_large_completion",
326
  metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
utils/service/pdf.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime
9
  from typing import List, Dict
10
  from fastapi import HTTPException
11
  from utils.logger import get_logger
 
12
 
13
  logger = get_logger("PDF", __name__)
14
 
@@ -691,7 +692,7 @@ Return only the formatted references, one per line, numbered sequentially."""
691
  user_prompt = f"Format these sources in IEEE style:\n\n{source_data}"
692
 
693
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
694
- response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator, user_id="system", context="pdf_citation")
695
 
696
  # Parse the response into individual references
697
  references = [line.strip() for line in response.split('\n') if line.strip() and line.strip().startswith('[')]
 
9
  from typing import List, Dict
10
  from fastapi import HTTPException
11
  from utils.logger import get_logger
12
+ from helpers.setup import gemini_rotator, nvidia_rotator
13
 
14
  logger = get_logger("PDF", __name__)
15
 
 
692
  user_prompt = f"Format these sources in IEEE style:\n\n{source_data}"
693
 
694
  selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
695
+ response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id="system", context="pdf_citation")
696
 
697
  # Parse the response into individual references
698
  references = [line.strip() for line in response.split('\n') if line.strip() and line.strip().startswith('[')]
utils/service/summarizer.py CHANGED
@@ -4,11 +4,12 @@ from typing import List
4
  from utils.logger import get_logger
5
  from utils.api.rotator import robust_post_json, APIKeyRotator
6
  from utils.api.router import qwen_chat_completion, nvidia_large_chat_completion
 
7
 
8
  logger = get_logger("SUM", __name__)
9
 
10
- # Create a module-level NVIDIA API key rotator (uses NVIDIA_API_1..N)
11
- ROTATOR = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)
12
 
13
 
14
  async def llama_chat(messages, temperature: float = 0.2, user_id: str = "system", context: str = "llama_chat") -> str:
@@ -85,7 +86,7 @@ async def nvidia_large_summarize(text: str, max_sentences: int = 3) -> str:
85
  if tracker:
86
  await tracker.track_model_usage(
87
  user_id="system",
88
- model_name="openai/gpt-oss-120b",
89
  provider="nvidia_large",
90
  context="summarization",
91
  metadata={"text_length": len(text)}
@@ -156,7 +157,7 @@ async def clean_chunk_text(text: str) -> str:
156
  if tracker:
157
  await tracker.track_model_usage(
158
  user_id="system",
159
- model_name="meta/llama-3.1-8b-instruct",
160
  provider="nvidia",
161
  context="content_cleaning",
162
  metadata={"text_length": len(text)}
@@ -187,7 +188,7 @@ async def qwen_summarize(text: str, max_sentences: int = 3) -> str:
187
  if tracker:
188
  await tracker.track_model_usage(
189
  user_id="system",
190
- model_name="meta/llama-3.1-8b-instruct",
191
  provider="nvidia",
192
  context="qwen_summarization",
193
  metadata={"text_length": len(text)}
 
4
  from utils.logger import get_logger
5
  from utils.api.rotator import robust_post_json, APIKeyRotator
6
  from utils.api.router import qwen_chat_completion, nvidia_large_chat_completion
7
+ from helpers.setup import nvidia_rotator
8
 
9
  logger = get_logger("SUM", __name__)
10
 
11
+ # Use the shared NVIDIA API key rotator from helpers.setup
12
+ ROTATOR = nvidia_rotator
13
 
14
 
15
  async def llama_chat(messages, temperature: float = 0.2, user_id: str = "system", context: str = "llama_chat") -> str:
 
86
  if tracker:
87
  await tracker.track_model_usage(
88
  user_id="system",
89
+ model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
90
  provider="nvidia_large",
91
  context="summarization",
92
  metadata={"text_length": len(text)}
 
157
  if tracker:
158
  await tracker.track_model_usage(
159
  user_id="system",
160
+ model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
161
  provider="nvidia",
162
  context="content_cleaning",
163
  metadata={"text_length": len(text)}
 
188
  if tracker:
189
  await tracker.track_model_usage(
190
  user_id="system",
191
+ model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
192
  provider="nvidia",
193
  context="qwen_summarization",
194
  metadata={"text_length": len(text)}