Spaces:
Sleeping
Sleeping
Commit
·
e6eaeb3
1
Parent(s):
3c913ab
Upd agent specs analytics
Browse files- helpers/coder.py +16 -2
- routes/chats.py +1 -1
- routes/search.py +6 -6
- utils/api/router.py +33 -5
helpers/coder.py
CHANGED
|
@@ -149,7 +149,7 @@ async def generate_code_artifacts(
|
|
| 149 |
)
|
| 150 |
|
| 151 |
# Use the new NVIDIA coder function
|
| 152 |
-
code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
|
| 153 |
code_md = (code_md or "").strip()
|
| 154 |
|
| 155 |
# Track NVIDIA_CODER usage
|
|
@@ -178,11 +178,25 @@ async def generate_code_artifacts(
|
|
| 178 |
return code_md
|
| 179 |
|
| 180 |
|
| 181 |
-
async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator) -> str:
|
| 182 |
"""
|
| 183 |
NVIDIA Coder completion using the specified coder model with streaming support.
|
| 184 |
Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
|
| 185 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
key = nvidia_rotator.get_key() or ""
|
| 187 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 188 |
|
|
|
|
| 149 |
)
|
| 150 |
|
| 151 |
# Use the new NVIDIA coder function
|
| 152 |
+
code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, "coding")
|
| 153 |
code_md = (code_md or "").strip()
|
| 154 |
|
| 155 |
# Track NVIDIA_CODER usage
|
|
|
|
| 178 |
return code_md
|
| 179 |
|
| 180 |
|
| 181 |
+
async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator, user_id: str = None, context: str = "") -> str:
|
| 182 |
"""
|
| 183 |
NVIDIA Coder completion using the specified coder model with streaming support.
|
| 184 |
Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
|
| 185 |
"""
|
| 186 |
+
# Track model usage for analytics
|
| 187 |
+
try:
|
| 188 |
+
from utils.analytics import get_analytics_tracker
|
| 189 |
+
tracker = get_analytics_tracker()
|
| 190 |
+
if tracker and user_id:
|
| 191 |
+
await tracker.track_model_usage(
|
| 192 |
+
user_id=user_id,
|
| 193 |
+
model_name="nvidia/coder-8b",
|
| 194 |
+
provider="nvidia_coder",
|
| 195 |
+
context=context or "nvidia_coder_completion",
|
| 196 |
+
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.debug(f"[CODER] Analytics tracking failed: {e}")
|
| 200 |
key = nvidia_rotator.get_key() or ""
|
| 201 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 202 |
|
routes/chats.py
CHANGED
|
@@ -245,7 +245,7 @@ Return only the variations, one per line, no numbering or extra text."""
|
|
| 245 |
|
| 246 |
# Use Qwen for better query variation generation reasoning
|
| 247 |
from utils.api.router import qwen_chat_completion
|
| 248 |
-
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 249 |
|
| 250 |
# Parse variations
|
| 251 |
variations = [line.strip() for line in response.split('\n') if line.strip()]
|
|
|
|
| 245 |
|
| 246 |
# Use Qwen for better query variation generation reasoning
|
| 247 |
from utils.api.router import qwen_chat_completion
|
| 248 |
+
response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "chat_query_variations")
|
| 249 |
|
| 250 |
# Parse variations
|
| 251 |
variations = [line.strip() for line in response.split('\n') if line.strip()]
|
routes/search.py
CHANGED
|
@@ -41,7 +41,7 @@ Return only the keywords, separated by spaces, no other text."""
|
|
| 41 |
)
|
| 42 |
|
| 43 |
# Use NVIDIA Large for better keyword extraction
|
| 44 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 45 |
|
| 46 |
keywords = [kw.strip() for kw in response.split() if kw.strip()]
|
| 47 |
return keywords[:5] if keywords else [user_query]
|
|
@@ -77,7 +77,7 @@ Return as JSON array of objects."""
|
|
| 77 |
user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
|
| 78 |
|
| 79 |
# Use NVIDIA Large for better strategy generation
|
| 80 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 81 |
|
| 82 |
try:
|
| 83 |
strategies = json.loads(response)
|
|
@@ -337,7 +337,7 @@ Return only the relevant content, no additional commentary."""
|
|
| 337 |
user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
|
| 338 |
|
| 339 |
# Use NVIDIA Large for better content extraction
|
| 340 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 341 |
|
| 342 |
return response.strip() if response.strip() else ""
|
| 343 |
|
|
@@ -362,7 +362,7 @@ Consider: accuracy, completeness, clarity, authority, recency, bias, factual cla
|
|
| 362 |
user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
|
| 363 |
|
| 364 |
# Use NVIDIA Large for better quality assessment
|
| 365 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 366 |
|
| 367 |
try:
|
| 368 |
# Try to parse JSON response
|
|
@@ -426,7 +426,7 @@ Focus on factual claims, statistics, and verifiable information."""
|
|
| 426 |
user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
|
| 427 |
|
| 428 |
# Use NVIDIA Large for better cross-validation
|
| 429 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 430 |
|
| 431 |
try:
|
| 432 |
validation = json.loads(response)
|
|
@@ -493,7 +493,7 @@ Be clear and direct."""
|
|
| 493 |
user_prompt = f"Summarize this content:\n\n{content}"
|
| 494 |
|
| 495 |
# Use NVIDIA Large for better summarization
|
| 496 |
-
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
|
| 497 |
|
| 498 |
return response.strip() if response.strip() else content[:200] + "..."
|
| 499 |
|
|
|
|
| 41 |
)
|
| 42 |
|
| 43 |
# Use NVIDIA Large for better keyword extraction
|
| 44 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 45 |
|
| 46 |
keywords = [kw.strip() for kw in response.split() if kw.strip()]
|
| 47 |
return keywords[:5] if keywords else [user_query]
|
|
|
|
| 77 |
user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
|
| 78 |
|
| 79 |
# Use NVIDIA Large for better strategy generation
|
| 80 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 81 |
|
| 82 |
try:
|
| 83 |
strategies = json.loads(response)
|
|
|
|
| 337 |
user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
|
| 338 |
|
| 339 |
# Use NVIDIA Large for better content extraction
|
| 340 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 341 |
|
| 342 |
return response.strip() if response.strip() else ""
|
| 343 |
|
|
|
|
| 362 |
user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
|
| 363 |
|
| 364 |
# Use NVIDIA Large for better quality assessment
|
| 365 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 366 |
|
| 367 |
try:
|
| 368 |
# Try to parse JSON response
|
|
|
|
| 426 |
user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
|
| 427 |
|
| 428 |
# Use NVIDIA Large for better cross-validation
|
| 429 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 430 |
|
| 431 |
try:
|
| 432 |
validation = json.loads(response)
|
|
|
|
| 493 |
user_prompt = f"Summarize this content:\n\n{content}"
|
| 494 |
|
| 495 |
# Use NVIDIA Large for better summarization
|
| 496 |
+
response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
|
| 497 |
|
| 498 |
return response.strip() if response.strip() else content[:200] + "..."
|
| 499 |
|
utils/api/router.py
CHANGED
|
@@ -184,7 +184,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
|
|
| 184 |
elif provider == "qwen":
|
| 185 |
# Use Qwen for reasoning tasks with fallback
|
| 186 |
try:
|
| 187 |
-
return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator)
|
| 188 |
except Exception as e:
|
| 189 |
logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
|
| 190 |
# Fallback: Qwen → NVIDIA_SMALL
|
|
@@ -194,7 +194,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
|
|
| 194 |
elif provider == "nvidia_large":
|
| 195 |
# Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
|
| 196 |
try:
|
| 197 |
-
return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator)
|
| 198 |
except Exception as e:
|
| 199 |
logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
|
| 200 |
# Fallback: NVIDIA_LARGE → NVIDIA_SMALL
|
|
@@ -205,7 +205,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
|
|
| 205 |
# Use NVIDIA Coder for code generation tasks with fallback
|
| 206 |
try:
|
| 207 |
from helpers.coder import nvidia_coder_completion
|
| 208 |
-
return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
|
| 209 |
except Exception as e:
|
| 210 |
logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
|
| 211 |
# Fallback: NVIDIA_CODER → NVIDIA_SMALL
|
|
@@ -216,11 +216,25 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
|
|
| 216 |
return "Unsupported provider."
|
| 217 |
|
| 218 |
|
| 219 |
-
async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
|
| 220 |
"""
|
| 221 |
Qwen chat completion with thinking mode enabled.
|
| 222 |
Uses the NVIDIA API rotator for key management.
|
| 223 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
key = nvidia_rotator.get_key() or ""
|
| 225 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 226 |
|
|
@@ -294,11 +308,25 @@ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rota
|
|
| 294 |
return "I couldn't process the request with Qwen model."
|
| 295 |
|
| 296 |
|
| 297 |
-
async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
|
| 298 |
"""
|
| 299 |
NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
|
| 300 |
Uses the NVIDIA API rotator for key management.
|
| 301 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
key = nvidia_rotator.get_key() or ""
|
| 303 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 304 |
|
|
|
|
| 184 |
elif provider == "qwen":
|
| 185 |
# Use Qwen for reasoning tasks with fallback
|
| 186 |
try:
|
| 187 |
+
return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
|
| 188 |
except Exception as e:
|
| 189 |
logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
|
| 190 |
# Fallback: Qwen → NVIDIA_SMALL
|
|
|
|
| 194 |
elif provider == "nvidia_large":
|
| 195 |
# Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
|
| 196 |
try:
|
| 197 |
+
return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
|
| 198 |
except Exception as e:
|
| 199 |
logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
|
| 200 |
# Fallback: NVIDIA_LARGE → NVIDIA_SMALL
|
|
|
|
| 205 |
# Use NVIDIA Coder for code generation tasks with fallback
|
| 206 |
try:
|
| 207 |
from helpers.coder import nvidia_coder_completion
|
| 208 |
+
return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
|
| 209 |
except Exception as e:
|
| 210 |
logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
|
| 211 |
# Fallback: NVIDIA_CODER → NVIDIA_SMALL
|
|
|
|
| 216 |
return "Unsupported provider."
|
| 217 |
|
| 218 |
|
| 219 |
+
async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
|
| 220 |
"""
|
| 221 |
Qwen chat completion with thinking mode enabled.
|
| 222 |
Uses the NVIDIA API rotator for key management.
|
| 223 |
"""
|
| 224 |
+
# Track model usage for analytics
|
| 225 |
+
try:
|
| 226 |
+
from utils.analytics import get_analytics_tracker
|
| 227 |
+
tracker = get_analytics_tracker()
|
| 228 |
+
if tracker and user_id:
|
| 229 |
+
await tracker.track_model_usage(
|
| 230 |
+
user_id=user_id,
|
| 231 |
+
model_name="meta/llama-3.1-8b-instruct",
|
| 232 |
+
provider="nvidia",
|
| 233 |
+
context=context or "qwen_completion",
|
| 234 |
+
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
| 235 |
+
)
|
| 236 |
+
except Exception as e:
|
| 237 |
+
logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
|
| 238 |
key = nvidia_rotator.get_key() or ""
|
| 239 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 240 |
|
|
|
|
| 308 |
return "I couldn't process the request with Qwen model."
|
| 309 |
|
| 310 |
|
| 311 |
+
async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
|
| 312 |
"""
|
| 313 |
NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
|
| 314 |
Uses the NVIDIA API rotator for key management.
|
| 315 |
"""
|
| 316 |
+
# Track model usage for analytics
|
| 317 |
+
try:
|
| 318 |
+
from utils.analytics import get_analytics_tracker
|
| 319 |
+
tracker = get_analytics_tracker()
|
| 320 |
+
if tracker and user_id:
|
| 321 |
+
await tracker.track_model_usage(
|
| 322 |
+
user_id=user_id,
|
| 323 |
+
model_name="openai/gpt-oss-120b",
|
| 324 |
+
provider="nvidia_large",
|
| 325 |
+
context=context or "nvidia_large_completion",
|
| 326 |
+
metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
|
| 327 |
+
)
|
| 328 |
+
except Exception as e:
|
| 329 |
+
logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
|
| 330 |
key = nvidia_rotator.get_key() or ""
|
| 331 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 332 |
|