Spaces:
Sleeping
Sleeping
Commit
·
7a1ebee
1
Parent(s):
a72fec7
Upd NVIDIA ana
Browse files- helpers/coder.py +3 -3
- helpers/diagram.py +3 -1
- legacy.py +1 -1
- memo/consolidation.py +15 -0
- memo/nvidia.py +16 -1
- memo/plan/execution.py +15 -0
- memo/plan/intent.py +15 -0
- memo/retrieval.py +45 -0
- routes/reports.py +1 -1
- utils/api/router.py +2 -2
- utils/service/pdf.py +2 -1
- utils/service/summarizer.py +6 -5
helpers/coder.py
CHANGED
|
@@ -97,7 +97,7 @@ async def generate_code_artifacts(
|
|
| 97 |
if tracker and user_id:
|
| 98 |
await tracker.track_model_usage(
|
| 99 |
user_id=user_id,
|
| 100 |
-
model_name="
|
| 101 |
provider="nvidia_large",
|
| 102 |
context="code_analysis",
|
| 103 |
metadata={"subsection_id": subsection_id}
|
|
@@ -157,7 +157,7 @@ async def generate_code_artifacts(
|
|
| 157 |
if tracker and user_id:
|
| 158 |
await tracker.track_model_usage(
|
| 159 |
user_id=user_id,
|
| 160 |
-
model_name=NVIDIA_CODER,
|
| 161 |
provider="nvidia_coder",
|
| 162 |
context="report_coding",
|
| 163 |
metadata={"subsection_id": subsection_id}
|
|
@@ -190,7 +190,7 @@ async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_r
|
|
| 190 |
if tracker and user_id:
|
| 191 |
await tracker.track_model_usage(
|
| 192 |
user_id=user_id,
|
| 193 |
-
model_name="
|
| 194 |
provider="nvidia_coder",
|
| 195 |
context=context or "nvidia_coder_completion",
|
| 196 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
|
|
| 97 |
if tracker and user_id:
|
| 98 |
await tracker.track_model_usage(
|
| 99 |
user_id=user_id,
|
| 100 |
+
model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
|
| 101 |
provider="nvidia_large",
|
| 102 |
context="code_analysis",
|
| 103 |
metadata={"subsection_id": subsection_id}
|
|
|
|
| 157 |
if tracker and user_id:
|
| 158 |
await tracker.track_model_usage(
|
| 159 |
user_id=user_id,
|
| 160 |
+
model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
|
| 161 |
provider="nvidia_coder",
|
| 162 |
context="report_coding",
|
| 163 |
metadata={"subsection_id": subsection_id}
|
|
|
|
| 190 |
if tracker and user_id:
|
| 191 |
await tracker.track_model_usage(
|
| 192 |
user_id=user_id,
|
| 193 |
+
model_name=os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct"),
|
| 194 |
provider="nvidia_coder",
|
| 195 |
context=context or "nvidia_coder_completion",
|
| 196 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
helpers/diagram.py
CHANGED
|
@@ -187,7 +187,9 @@ Please provide the corrected Mermaid code that will render successfully."""
|
|
| 187 |
|
| 188 |
# Use NVIDIA_LARGE for better error correction
|
| 189 |
selection = {"provider": "nvidia_large", "model": os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b")}
|
| 190 |
-
|
|
|
|
|
|
|
| 191 |
|
| 192 |
if response:
|
| 193 |
# Clean up the response
|
|
|
|
| 187 |
|
| 188 |
# Use NVIDIA_LARGE for better error correction
|
| 189 |
selection = {"provider": "nvidia_large", "model": os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b")}
|
| 190 |
+
# Import rotators from setup
|
| 191 |
+
from helpers.setup import gemini_rotator, nvidia_rotator
|
| 192 |
+
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id, "diagram_fix")
|
| 193 |
|
| 194 |
if response:
|
| 195 |
# Clean up the response
|
legacy.py
CHANGED
|
@@ -791,7 +791,7 @@ Return only the variations, one per line, no numbering or extra text."""
|
|
| 791 |
|
| 792 |
from utils.api.router import generate_answer_with_model
|
| 793 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 794 |
-
response = await generate_answer_with_model(selection, sys_prompt, user_prompt,
|
| 795 |
|
| 796 |
# Parse variations
|
| 797 |
variations = [line.strip() for line in response.split('\n') if line.strip()]
|
|
|
|
| 791 |
|
| 792 |
from utils.api.router import generate_answer_with_model
|
| 793 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 794 |
+
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id="system", context="legacy_analysis")
|
| 795 |
|
| 796 |
# Parse variations
|
| 797 |
variations = [line.strip() for line in response.split('\n') if line.strip()]
|
memo/consolidation.py
CHANGED
|
@@ -217,6 +217,21 @@ Create a single consolidated memory:"""
|
|
| 217 |
metadata={"count": len(contents)}
|
| 218 |
)
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
# Use Qwen for better memory consolidation reasoning
|
| 221 |
from utils.api.router import qwen_chat_completion
|
| 222 |
consolidated_content = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_consolidation")
|
|
|
|
| 217 |
metadata={"count": len(contents)}
|
| 218 |
)
|
| 219 |
|
| 220 |
+
# Track memo agent usage
|
| 221 |
+
try:
|
| 222 |
+
from utils.analytics import get_analytics_tracker
|
| 223 |
+
tracker = get_analytics_tracker()
|
| 224 |
+
if tracker:
|
| 225 |
+
await tracker.track_agent_usage(
|
| 226 |
+
user_id=user_id,
|
| 227 |
+
agent_name="memo",
|
| 228 |
+
action="consolidate",
|
| 229 |
+
context="memory_consolidation",
|
| 230 |
+
metadata={"memories_count": len(memories)}
|
| 231 |
+
)
|
| 232 |
+
except Exception:
|
| 233 |
+
pass
|
| 234 |
+
|
| 235 |
# Use Qwen for better memory consolidation reasoning
|
| 236 |
from utils.api.router import qwen_chat_completion
|
| 237 |
consolidated_content = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_consolidation")
|
memo/nvidia.py
CHANGED
|
@@ -29,7 +29,7 @@ async def nvidia_chat(system_prompt: str, user_prompt: str, nvidia_key: str, rot
|
|
| 29 |
if tracker:
|
| 30 |
await tracker.track_model_usage(
|
| 31 |
user_id=user_id,
|
| 32 |
-
model_name=NVIDIA_SMALL,
|
| 33 |
provider="nvidia",
|
| 34 |
context=context,
|
| 35 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
@@ -59,6 +59,21 @@ async def qwen_chat(system_prompt: str, user_prompt: str, rotator, user_id: str
|
|
| 59 |
"""
|
| 60 |
Qwen chat call for medium complexity tasks with thinking mode.
|
| 61 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
try:
|
| 63 |
return await qwen_chat_completion(system_prompt, user_prompt, rotator, user_id, "memo_qwen_chat")
|
| 64 |
except Exception as e:
|
|
|
|
| 29 |
if tracker:
|
| 30 |
await tracker.track_model_usage(
|
| 31 |
user_id=user_id,
|
| 32 |
+
model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
|
| 33 |
provider="nvidia",
|
| 34 |
context=context,
|
| 35 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
|
|
| 59 |
"""
|
| 60 |
Qwen chat call for medium complexity tasks with thinking mode.
|
| 61 |
"""
|
| 62 |
+
# Track memo agent usage
|
| 63 |
+
try:
|
| 64 |
+
from utils.analytics import get_analytics_tracker
|
| 65 |
+
tracker = get_analytics_tracker()
|
| 66 |
+
if tracker:
|
| 67 |
+
await tracker.track_agent_usage(
|
| 68 |
+
user_id=user_id,
|
| 69 |
+
agent_name="memo",
|
| 70 |
+
action="chat",
|
| 71 |
+
context="memo_qwen_chat",
|
| 72 |
+
metadata={"query": user_prompt[:100]}
|
| 73 |
+
)
|
| 74 |
+
except Exception:
|
| 75 |
+
pass
|
| 76 |
+
|
| 77 |
try:
|
| 78 |
return await qwen_chat_completion(system_prompt, user_prompt, rotator, user_id, "memo_qwen_chat")
|
| 79 |
except Exception as e:
|
memo/plan/execution.py
CHANGED
|
@@ -388,6 +388,21 @@ Select the most relevant Q&A memories:"""
|
|
| 388 |
metadata={"question": question[:100], "memories_count": len(memories)}
|
| 389 |
)
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
# Use Qwen for better memory selection reasoning
|
| 392 |
from utils.api.router import qwen_chat_completion
|
| 393 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_selection")
|
|
|
|
| 388 |
metadata={"question": question[:100], "memories_count": len(memories)}
|
| 389 |
)
|
| 390 |
|
| 391 |
+
# Track memo agent usage
|
| 392 |
+
try:
|
| 393 |
+
from utils.analytics import get_analytics_tracker
|
| 394 |
+
tracker = get_analytics_tracker()
|
| 395 |
+
if tracker:
|
| 396 |
+
await tracker.track_agent_usage(
|
| 397 |
+
user_id=user_id,
|
| 398 |
+
agent_name="memo",
|
| 399 |
+
action="select",
|
| 400 |
+
context="memory_selection",
|
| 401 |
+
metadata={"query": query}
|
| 402 |
+
)
|
| 403 |
+
except Exception:
|
| 404 |
+
pass
|
| 405 |
+
|
| 406 |
# Use Qwen for better memory selection reasoning
|
| 407 |
from utils.api.router import qwen_chat_completion
|
| 408 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "memory_selection")
|
memo/plan/intent.py
CHANGED
|
@@ -148,6 +148,21 @@ Respond with only the intent name (e.g., "ENHANCEMENT")."""
|
|
| 148 |
metadata={"question": question[:100]}
|
| 149 |
)
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
# Use Qwen for better intent detection reasoning
|
| 152 |
from utils.api.router import qwen_chat_completion
|
| 153 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "intent_detection")
|
|
|
|
| 148 |
metadata={"question": question[:100]}
|
| 149 |
)
|
| 150 |
|
| 151 |
+
# Track memo agent usage
|
| 152 |
+
try:
|
| 153 |
+
from utils.analytics import get_analytics_tracker
|
| 154 |
+
tracker = get_analytics_tracker()
|
| 155 |
+
if tracker:
|
| 156 |
+
await tracker.track_agent_usage(
|
| 157 |
+
user_id=user_id,
|
| 158 |
+
agent_name="memo",
|
| 159 |
+
action="intent",
|
| 160 |
+
context="intent_detection",
|
| 161 |
+
metadata={"query": query}
|
| 162 |
+
)
|
| 163 |
+
except Exception:
|
| 164 |
+
pass
|
| 165 |
+
|
| 166 |
# Use Qwen for better intent detection reasoning
|
| 167 |
from utils.api.router import qwen_chat_completion
|
| 168 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "intent_detection")
|
memo/retrieval.py
CHANGED
|
@@ -259,6 +259,21 @@ Should this question be enhanced with context?"""
|
|
| 259 |
metadata={"question": question[:100]}
|
| 260 |
)
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
# Use Qwen for better context enhancement reasoning
|
| 263 |
from utils.api.router import qwen_chat_completion
|
| 264 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "enhancement_decision")
|
|
@@ -343,6 +358,21 @@ Create an enhanced version that incorporates this context naturally."""
|
|
| 343 |
metadata={"question": question[:100]}
|
| 344 |
)
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
# Use Qwen for better question enhancement reasoning
|
| 347 |
from utils.api.router import qwen_chat_completion
|
| 348 |
enhanced_question = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "question_enhancement")
|
|
@@ -420,6 +450,21 @@ Create an enhanced version that incorporates this context naturally."""
|
|
| 420 |
metadata={"instructions": instructions[:100]}
|
| 421 |
)
|
| 422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
# Use Qwen for better instruction enhancement reasoning
|
| 424 |
from utils.api.router import qwen_chat_completion
|
| 425 |
enhanced_instructions = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "instruction_enhancement")
|
|
|
|
| 259 |
metadata={"question": question[:100]}
|
| 260 |
)
|
| 261 |
|
| 262 |
+
# Track memo agent usage
|
| 263 |
+
try:
|
| 264 |
+
from utils.analytics import get_analytics_tracker
|
| 265 |
+
tracker = get_analytics_tracker()
|
| 266 |
+
if tracker:
|
| 267 |
+
await tracker.track_agent_usage(
|
| 268 |
+
user_id=user_id,
|
| 269 |
+
agent_name="memo",
|
| 270 |
+
action="enhance",
|
| 271 |
+
context="enhancement_decision",
|
| 272 |
+
metadata={"query": query}
|
| 273 |
+
)
|
| 274 |
+
except Exception:
|
| 275 |
+
pass
|
| 276 |
+
|
| 277 |
# Use Qwen for better context enhancement reasoning
|
| 278 |
from utils.api.router import qwen_chat_completion
|
| 279 |
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "enhancement_decision")
|
|
|
|
| 358 |
metadata={"question": question[:100]}
|
| 359 |
)
|
| 360 |
|
| 361 |
+
# Track memo agent usage
|
| 362 |
+
try:
|
| 363 |
+
from utils.analytics import get_analytics_tracker
|
| 364 |
+
tracker = get_analytics_tracker()
|
| 365 |
+
if tracker:
|
| 366 |
+
await tracker.track_agent_usage(
|
| 367 |
+
user_id=user_id,
|
| 368 |
+
agent_name="memo",
|
| 369 |
+
action="enhance",
|
| 370 |
+
context="question_enhancement",
|
| 371 |
+
metadata={"query": question}
|
| 372 |
+
)
|
| 373 |
+
except Exception:
|
| 374 |
+
pass
|
| 375 |
+
|
| 376 |
# Use Qwen for better question enhancement reasoning
|
| 377 |
from utils.api.router import qwen_chat_completion
|
| 378 |
enhanced_question = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "question_enhancement")
|
|
|
|
| 450 |
metadata={"instructions": instructions[:100]}
|
| 451 |
)
|
| 452 |
|
| 453 |
+
# Track memo agent usage
|
| 454 |
+
try:
|
| 455 |
+
from utils.analytics import get_analytics_tracker
|
| 456 |
+
tracker = get_analytics_tracker()
|
| 457 |
+
if tracker:
|
| 458 |
+
await tracker.track_agent_usage(
|
| 459 |
+
user_id=user_id,
|
| 460 |
+
agent_name="memo",
|
| 461 |
+
action="enhance",
|
| 462 |
+
context="instruction_enhancement",
|
| 463 |
+
metadata={"instructions": instructions}
|
| 464 |
+
)
|
| 465 |
+
except Exception:
|
| 466 |
+
pass
|
| 467 |
+
|
| 468 |
# Use Qwen for better instruction enhancement reasoning
|
| 469 |
from utils.api.router import qwen_chat_completion
|
| 470 |
enhanced_instructions = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "instruction_enhancement")
|
routes/reports.py
CHANGED
|
@@ -1272,7 +1272,7 @@ Return the renumbered headings in the format: "level: new_number: heading_text"
|
|
| 1272 |
|
| 1273 |
# Use NVIDIA model for heading re-numbering
|
| 1274 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 1275 |
-
response = await generate_answer_with_model(selection, sys_prompt, user_prompt,
|
| 1276 |
|
| 1277 |
# Parse the AI response
|
| 1278 |
renumbered_headings = []
|
|
|
|
| 1272 |
|
| 1273 |
# Use NVIDIA model for heading re-numbering
|
| 1274 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 1275 |
+
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id, "report_heading_fix")
|
| 1276 |
|
| 1277 |
# Parse the AI response
|
| 1278 |
renumbered_headings = []
|
utils/api/router.py
CHANGED
|
@@ -228,7 +228,7 @@ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rota
|
|
| 228 |
if tracker and user_id:
|
| 229 |
await tracker.track_model_usage(
|
| 230 |
user_id=user_id,
|
| 231 |
-
model_name="
|
| 232 |
provider="nvidia",
|
| 233 |
context=context or "qwen_completion",
|
| 234 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
@@ -320,7 +320,7 @@ async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvi
|
|
| 320 |
if tracker and user_id:
|
| 321 |
await tracker.track_model_usage(
|
| 322 |
user_id=user_id,
|
| 323 |
-
model_name="openai/gpt-oss-120b",
|
| 324 |
provider="nvidia_large",
|
| 325 |
context=context or "nvidia_large_completion",
|
| 326 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
|
|
| 228 |
if tracker and user_id:
|
| 229 |
await tracker.track_model_usage(
|
| 230 |
user_id=user_id,
|
| 231 |
+
model_name=os.getenv("NVIDIA_MEDIUM", "qwen/qwen3-next-80b-a3b-thinking"),
|
| 232 |
provider="nvidia",
|
| 233 |
context=context or "qwen_completion",
|
| 234 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
|
|
|
| 320 |
if tracker and user_id:
|
| 321 |
await tracker.track_model_usage(
|
| 322 |
user_id=user_id,
|
| 323 |
+
model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
|
| 324 |
provider="nvidia_large",
|
| 325 |
context=context or "nvidia_large_completion",
|
| 326 |
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
utils/service/pdf.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime
|
|
| 9 |
from typing import List, Dict
|
| 10 |
from fastapi import HTTPException
|
| 11 |
from utils.logger import get_logger
|
|
|
|
| 12 |
|
| 13 |
logger = get_logger("PDF", __name__)
|
| 14 |
|
|
@@ -691,7 +692,7 @@ Return only the formatted references, one per line, numbered sequentially."""
|
|
| 691 |
user_prompt = f"Format these sources in IEEE style:\n\n{source_data}"
|
| 692 |
|
| 693 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 694 |
-
response = await generate_answer_with_model(selection, sys_prompt, user_prompt,
|
| 695 |
|
| 696 |
# Parse the response into individual references
|
| 697 |
references = [line.strip() for line in response.split('\n') if line.strip() and line.strip().startswith('[')]
|
|
|
|
| 9 |
from typing import List, Dict
|
| 10 |
from fastapi import HTTPException
|
| 11 |
from utils.logger import get_logger
|
| 12 |
+
from helpers.setup import gemini_rotator, nvidia_rotator
|
| 13 |
|
| 14 |
logger = get_logger("PDF", __name__)
|
| 15 |
|
|
|
|
| 692 |
user_prompt = f"Format these sources in IEEE style:\n\n{source_data}"
|
| 693 |
|
| 694 |
selection = {"provider": "nvidia", "model": os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")}
|
| 695 |
+
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id="system", context="pdf_citation")
|
| 696 |
|
| 697 |
# Parse the response into individual references
|
| 698 |
references = [line.strip() for line in response.split('\n') if line.strip() and line.strip().startswith('[')]
|
utils/service/summarizer.py
CHANGED
|
@@ -4,11 +4,12 @@ from typing import List
|
|
| 4 |
from utils.logger import get_logger
|
| 5 |
from utils.api.rotator import robust_post_json, APIKeyRotator
|
| 6 |
from utils.api.router import qwen_chat_completion, nvidia_large_chat_completion
|
|
|
|
| 7 |
|
| 8 |
logger = get_logger("SUM", __name__)
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
ROTATOR =
|
| 12 |
|
| 13 |
|
| 14 |
async def llama_chat(messages, temperature: float = 0.2, user_id: str = "system", context: str = "llama_chat") -> str:
|
|
@@ -85,7 +86,7 @@ async def nvidia_large_summarize(text: str, max_sentences: int = 3) -> str:
|
|
| 85 |
if tracker:
|
| 86 |
await tracker.track_model_usage(
|
| 87 |
user_id="system",
|
| 88 |
-
model_name="openai/gpt-oss-120b",
|
| 89 |
provider="nvidia_large",
|
| 90 |
context="summarization",
|
| 91 |
metadata={"text_length": len(text)}
|
|
@@ -156,7 +157,7 @@ async def clean_chunk_text(text: str) -> str:
|
|
| 156 |
if tracker:
|
| 157 |
await tracker.track_model_usage(
|
| 158 |
user_id="system",
|
| 159 |
-
model_name="meta/llama-3.1-8b-instruct",
|
| 160 |
provider="nvidia",
|
| 161 |
context="content_cleaning",
|
| 162 |
metadata={"text_length": len(text)}
|
|
@@ -187,7 +188,7 @@ async def qwen_summarize(text: str, max_sentences: int = 3) -> str:
|
|
| 187 |
if tracker:
|
| 188 |
await tracker.track_model_usage(
|
| 189 |
user_id="system",
|
| 190 |
-
model_name="meta/llama-3.1-8b-instruct",
|
| 191 |
provider="nvidia",
|
| 192 |
context="qwen_summarization",
|
| 193 |
metadata={"text_length": len(text)}
|
|
|
|
| 4 |
from utils.logger import get_logger
|
| 5 |
from utils.api.rotator import robust_post_json, APIKeyRotator
|
| 6 |
from utils.api.router import qwen_chat_completion, nvidia_large_chat_completion
|
| 7 |
+
from helpers.setup import nvidia_rotator
|
| 8 |
|
| 9 |
logger = get_logger("SUM", __name__)
|
| 10 |
|
| 11 |
+
# Use the shared NVIDIA API key rotator from helpers.setup
|
| 12 |
+
ROTATOR = nvidia_rotator
|
| 13 |
|
| 14 |
|
| 15 |
async def llama_chat(messages, temperature: float = 0.2, user_id: str = "system", context: str = "llama_chat") -> str:
|
|
|
|
| 86 |
if tracker:
|
| 87 |
await tracker.track_model_usage(
|
| 88 |
user_id="system",
|
| 89 |
+
model_name=os.getenv("NVIDIA_LARGE", "openai/gpt-oss-120b"),
|
| 90 |
provider="nvidia_large",
|
| 91 |
context="summarization",
|
| 92 |
metadata={"text_length": len(text)}
|
|
|
|
| 157 |
if tracker:
|
| 158 |
await tracker.track_model_usage(
|
| 159 |
user_id="system",
|
| 160 |
+
model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
|
| 161 |
provider="nvidia",
|
| 162 |
context="content_cleaning",
|
| 163 |
metadata={"text_length": len(text)}
|
|
|
|
| 188 |
if tracker:
|
| 189 |
await tracker.track_model_usage(
|
| 190 |
user_id="system",
|
| 191 |
+
model_name=os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct"),
|
| 192 |
provider="nvidia",
|
| 193 |
context="qwen_summarization",
|
| 194 |
metadata={"text_length": len(text)}
|