Spaces:

BinKhoaLe1812
/

EdSummariser

Sleeping

App Files Files Community

LiamKhoaLe commited on Sep 27

Commit

e6eaeb3

1 Parent(s): 3c913ab

Upd agent specs analytics

Browse files

Files changed (4) hide show

helpers/coder.py +16 -2
routes/chats.py +1 -1
routes/search.py +6 -6
utils/api/router.py +33 -5

helpers/coder.py CHANGED Viewed

@@ -149,7 +149,7 @@ async def generate_code_artifacts(
     )
     # Use the new NVIDIA coder function
-    code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
     code_md = (code_md or "").strip()
     # Track NVIDIA_CODER usage
@@ -178,11 +178,25 @@ async def generate_code_artifacts(
     return code_md
-async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator) -> str:
     """
     NVIDIA Coder completion using the specified coder model with streaming support.
     Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
     """
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"

     )
     # Use the new NVIDIA coder function
+    code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, "coding")
     code_md = (code_md or "").strip()
     # Track NVIDIA_CODER usage
     return code_md
+async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator, user_id: str = None, context: str = "") -> str:
     """
     NVIDIA Coder completion using the specified coder model with streaming support.
     Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
     """
+    # Track model usage for analytics
+    try:
+        from utils.analytics import get_analytics_tracker
+        tracker = get_analytics_tracker()
+        if tracker and user_id:
+            await tracker.track_model_usage(
+                user_id=user_id,
+                model_name="nvidia/coder-8b",
+                provider="nvidia_coder",
+                context=context or "nvidia_coder_completion",
+                metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
+            )
+    except Exception as e:
+        logger.debug(f"[CODER] Analytics tracking failed: {e}")
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"

routes/chats.py CHANGED Viewed

@@ -245,7 +245,7 @@ Return only the variations, one per line, no numbering or extra text."""
         # Use Qwen for better query variation generation reasoning
         from utils.api.router import qwen_chat_completion
-        response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         # Parse variations
         variations = [line.strip() for line in response.split('\n') if line.strip()]

         # Use Qwen for better query variation generation reasoning
         from utils.api.router import qwen_chat_completion
+        response = await qwen_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "chat_query_variations")
         # Parse variations
         variations = [line.strip() for line in response.split('\n') if line.strip()]

routes/search.py CHANGED Viewed

@@ -41,7 +41,7 @@ Return only the keywords, separated by spaces, no other text."""
             )
         # Use NVIDIA Large for better keyword extraction
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         keywords = [kw.strip() for kw in response.split() if kw.strip()]
         return keywords[:5] if keywords else [user_query]
@@ -77,7 +77,7 @@ Return as JSON array of objects."""
         user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
         # Use NVIDIA Large for better strategy generation
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         try:
             strategies = json.loads(response)
@@ -337,7 +337,7 @@ Return only the relevant content, no additional commentary."""
         user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
         # Use NVIDIA Large for better content extraction
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         return response.strip() if response.strip() else ""
@@ -362,7 +362,7 @@ Consider: accuracy, completeness, clarity, authority, recency, bias, factual cla
         user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
         # Use NVIDIA Large for better quality assessment
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         try:
             # Try to parse JSON response
@@ -426,7 +426,7 @@ Focus on factual claims, statistics, and verifiable information."""
         user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
         # Use NVIDIA Large for better cross-validation
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         try:
             validation = json.loads(response)
@@ -493,7 +493,7 @@ Be clear and direct."""
         user_prompt = f"Summarize this content:\n\n{content}"
         # Use NVIDIA Large for better summarization
-        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator)
         return response.strip() if response.strip() else content[:200] + "..."

             )
         # Use NVIDIA Large for better keyword extraction
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         keywords = [kw.strip() for kw in response.split() if kw.strip()]
         return keywords[:5] if keywords else [user_query]
         user_prompt = f"User query: {user_query}\n\nGenerate search strategies:"
         # Use NVIDIA Large for better strategy generation
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         try:
             strategies = json.loads(response)
         user_prompt = f"User Query: {user_query}\n\nWeb Content:\n{content}\n\nExtract relevant information:"
         # Use NVIDIA Large for better content extraction
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         return response.strip() if response.strip() else ""
         user_prompt = f"Assess this content quality:\n\n{content[:2000]}"
         # Use NVIDIA Large for better quality assessment
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         try:
             # Try to parse JSON response
         user_prompt = f"Main content:\n{content[:1000]}\n\nOther sources:\n{comparison_text[:2000]}\n\nAnalyze consistency:"
         # Use NVIDIA Large for better cross-validation
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         try:
             validation = json.loads(response)
         user_prompt = f"Summarize this content:\n\n{content}"
         # Use NVIDIA Large for better summarization
+        response = await nvidia_large_chat_completion(sys_prompt, user_prompt, nvidia_rotator, user_id, "search_keyword_extraction")
         return response.strip() if response.strip() else content[:200] + "..."

utils/api/router.py CHANGED Viewed

@@ -184,7 +184,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
     elif provider == "qwen":
         # Use Qwen for reasoning tasks with fallback
         try:
-            return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator)
         except Exception as e:
             logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
             # Fallback: Qwen → NVIDIA_SMALL
@@ -194,7 +194,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
     elif provider == "nvidia_large":
         # Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
         try:
-            return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator)
         except Exception as e:
             logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
             # Fallback: NVIDIA_LARGE → NVIDIA_SMALL
@@ -205,7 +205,7 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
         # Use NVIDIA Coder for code generation tasks with fallback
         try:
             from helpers.coder import nvidia_coder_completion
-            return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
         except Exception as e:
             logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
             # Fallback: NVIDIA_CODER → NVIDIA_SMALL
@@ -216,11 +216,25 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
     return "Unsupported provider."
-async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
     """
     Qwen chat completion with thinking mode enabled.
     Uses the NVIDIA API rotator for key management.
     """
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"
@@ -294,11 +308,25 @@ async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rota
         return "I couldn't process the request with Qwen model."
-async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator) -> str:
     """
     NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
     Uses the NVIDIA API rotator for key management.
     """
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"

     elif provider == "qwen":
         # Use Qwen for reasoning tasks with fallback
         try:
+            return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
         except Exception as e:
             logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
             # Fallback: Qwen → NVIDIA_SMALL
     elif provider == "nvidia_large":
         # Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
         try:
+            return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
         except Exception as e:
             logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
             # Fallback: NVIDIA_LARGE → NVIDIA_SMALL
         # Use NVIDIA Coder for code generation tasks with fallback
         try:
             from helpers.coder import nvidia_coder_completion
+            return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator, user_id, context)
         except Exception as e:
             logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
             # Fallback: NVIDIA_CODER → NVIDIA_SMALL
     return "Unsupported provider."
+async def qwen_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
     """
     Qwen chat completion with thinking mode enabled.
     Uses the NVIDIA API rotator for key management.
     """
+    # Track model usage for analytics
+    try:
+        from utils.analytics import get_analytics_tracker
+        tracker = get_analytics_tracker()
+        if tracker and user_id:
+            await tracker.track_model_usage(
+                user_id=user_id,
+                model_name="meta/llama-3.1-8b-instruct",
+                provider="nvidia",
+                context=context or "qwen_completion",
+                metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
+            )
+    except Exception as e:
+        logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"
         return "I couldn't process the request with Qwen model."
+async def nvidia_large_chat_completion(system_prompt: str, user_prompt: str, nvidia_rotator: APIKeyRotator, user_id: str = None, context: str = "") -> str:
     """
     NVIDIA Large (GPT-OSS) chat completion for hard/long context tasks.
     Uses the NVIDIA API rotator for key management.
     """
+    # Track model usage for analytics
+    try:
+        from utils.analytics import get_analytics_tracker
+        tracker = get_analytics_tracker()
+        if tracker and user_id:
+            await tracker.track_model_usage(
+                user_id=user_id,
+                model_name="openai/gpt-oss-120b",
+                provider="nvidia_large",
+                context=context or "nvidia_large_completion",
+                metadata={"system_prompt_length": len(system_prompt), "user_prompt_length": len(user_prompt)}
+            )
+    except Exception as e:
+        logger.debug(f"[ROUTER] Analytics tracking failed: {e}")
     key = nvidia_rotator.get_key() or ""
     url = "https://integrate.api.nvidia.com/v1/chat/completions"