Spaces:

VibecoderMcSwaggins
/

DeepBoner

Paused

VibecoderMcSwaggins commited on 12 days ago

Commit

4bfa475

1 Parent(s): a5ad664

fix(coderabbit): Address all CodeRabbit review findings

Critical fixes:
- HuggingFaceProvider now requires HF_TOKEN (raises clear RuntimeError if missing)
- Fixed test claiming "no keys" but setting hf_token

Minor fixes:
- service_loader.py: sk-ant- keys now properly excluded from OpenAI path
- llamaindex_rag.py: Added defense-in-depth key prefix validation
- Removed all Anthropic mentions from user-facing messages (4 places)

Files changed (4) hide show

src/agent_factory/judges.py +16 -8
src/services/llamaindex_rag.py +13 -0
src/utils/service_loader.py +8 -2
tests/unit/agent_factory/test_get_model_auto_detect.py +12 -6

src/agent_factory/judges.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import asyncio
 import json
 from functools import partial
 from typing import Any, ClassVar
@@ -93,14 +94,21 @@ def get_model(api_key: str | None = None) -> Any:
     # Priority 3: HuggingFace (free fallback)
     # Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
     model_name = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
-    hf_token = settings.hf_token
     if hf_token:
         hf_provider = HuggingFaceProvider(api_key=hf_token)
         return HuggingFaceModel(model_name, provider=hf_provider)
-    # HuggingFace without token (public models only)
-    hf_provider = HuggingFaceProvider()
-    return HuggingFaceModel(model_name, provider=hf_provider)
 class JudgeHandler:
@@ -519,7 +527,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
                 "The HuggingFace Inference API free tier limit has been reached. "
                 "The search results listed below were retrieved but could not be "
                 "analyzed by the AI. "
-                "Please try again later, or add an OpenAI/Anthropic API key above "
                 "for unlimited access."
             ),
         )
@@ -555,7 +563,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
                 f"Search found {len(evidence)} sources (listed below) but they could not "
                 "be analyzed by AI.\n\n"
                 "**Options:**\n"
-                "- Add an OpenAI or Anthropic API key for reliable analysis\n"
                 "- Try again later when HF Inference is available\n"
                 "- Review the raw search results below"
             ),
@@ -584,7 +592,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
                 f"{question} clinical trials",
                 f"{question} drug candidates",
             ],
-            reasoning=f"HF Inference failed: {error}. Recommend configuring OpenAI/Anthropic key.",
         )
     async def synthesize(self, system_prompt: str, user_prompt: str) -> str:
@@ -741,6 +749,6 @@ class MockJudgeHandler:
             reasoning=(
                 f"Demo mode assessment based on {evidence_count} real search results. "
                 "For AI-powered analysis with drug candidate identification and "
-                "evidence synthesis, configure OPENAI_API_KEY or ANTHROPIC_API_KEY."
             ),
         )

 import asyncio
 import json
+import os
 from functools import partial
 from typing import Any, ClassVar
     # Priority 3: HuggingFace (free fallback)
     # Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
     model_name = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
+    # Try settings.hf_token first, then fall back to HF_TOKEN env var
+    # HuggingFaceProvider requires a token - it won't work without one
+    hf_token = settings.hf_token or os.environ.get("HF_TOKEN")
     if hf_token:
         hf_provider = HuggingFaceProvider(api_key=hf_token)
         return HuggingFaceModel(model_name, provider=hf_provider)
+    # No HF token available - raise clear error
+    raise RuntimeError(
+        "No LLM API key available. Either:\n"
+        "  1. Set OPENAI_API_KEY for premium tier, or\n"
+        "  2. Set HF_TOKEN for free HuggingFace tier\n"
+        "Get a free HF token at: https://huggingface.co/settings/tokens"
+    )
 class JudgeHandler:
                 "The HuggingFace Inference API free tier limit has been reached. "
                 "The search results listed below were retrieved but could not be "
                 "analyzed by the AI. "
+                "Please try again later, or add an OpenAI API key above "
                 "for unlimited access."
             ),
         )
                 f"Search found {len(evidence)} sources (listed below) but they could not "
                 "be analyzed by AI.\n\n"
                 "**Options:**\n"
+                "- Add an OpenAI API key for reliable analysis\n"
                 "- Try again later when HF Inference is available\n"
                 "- Review the raw search results below"
             ),
                 f"{question} clinical trials",
                 f"{question} drug candidates",
             ],
+            reasoning=f"HF Inference failed: {error}. Recommend configuring OpenAI API key.",
         )
     async def synthesize(self, system_prompt: str, user_prompt: str) -> str:
             reasoning=(
                 f"Demo mode assessment based on {evidence_count} real search results. "
                 "For AI-powered analysis with drug candidate identification and "
+                "evidence synthesis, configure OPENAI_API_KEY."
             ),
         )

src/services/llamaindex_rag.py CHANGED Viewed

@@ -90,6 +90,19 @@ class LlamaIndexRAGService:
         if not self.api_key:
             raise ConfigurationError("OPENAI_API_KEY required for LlamaIndex RAG service")
         # Configure LlamaIndex settings (use centralized config)
         self._Settings.llm = OpenAI(
             model=settings.openai_model,

         if not self.api_key:
             raise ConfigurationError("OPENAI_API_KEY required for LlamaIndex RAG service")
+        # Defense-in-depth: Validate key prefix to prevent cryptic auth errors
+        # Note: Anthropic keys start with sk-ant-, which would pass startswith("sk-")
+        if self.api_key.startswith("sk-ant-"):
+            raise ConfigurationError(
+                "Anthropic keys (sk-ant-...) are not supported for embeddings. "
+                "LlamaIndex RAG requires an OpenAI API key (sk-...)."
+            )
+        if not self.api_key.startswith("sk-"):
+            raise ConfigurationError(
+                f"Invalid API key format. Expected OpenAI key starting with 'sk-', "
+                f"got key starting with '{self.api_key[:8]}...'."
+            )
         # Configure LlamaIndex settings (use centralized config)
         self._Settings.llm = OpenAI(
             model=settings.openai_model,

src/utils/service_loader.py CHANGED Viewed

@@ -66,9 +66,15 @@ def get_embedding_service(api_key: str | None = None) -> "EmbeddingServiceProtoc
         ImportError: If no embedding service dependencies are available
     """
     # Determine if we have a valid OpenAI key (BYOK or Env)
     has_openai = False
-    if api_key and api_key.startswith("sk-"):
-        has_openai = True
     elif settings.has_openai_key:
         has_openai = True

         ImportError: If no embedding service dependencies are available
     """
     # Determine if we have a valid OpenAI key (BYOK or Env)
+    # Note: Must check sk-ant- BEFORE sk- since Anthropic keys start with sk-ant-
     has_openai = False
+    if api_key:
+        if api_key.startswith("sk-ant-"):
+            # Anthropic key - not supported for embeddings
+            logger.warning("Anthropic keys don't support embeddings, falling back to free tier")
+        elif api_key.startswith("sk-"):
+            # OpenAI BYOK
+            has_openai = True
     elif settings.has_openai_key:
         has_openai = True

tests/unit/agent_factory/test_get_model_auto_detect.py CHANGED Viewed

@@ -48,15 +48,21 @@ class TestGetModelAutoDetect:
         model = get_model()
         assert isinstance(model, HuggingFaceModel)
-    def test_falls_through_to_huggingface_when_no_keys(self, monkeypatch):
-        """No keys at all → Falls through to HuggingFace (free tier)."""
         monkeypatch.setattr(settings, "openai_api_key", None)
-        monkeypatch.setattr(settings, "hf_token", "hf_test_token")
         monkeypatch.setattr(settings, "huggingface_model", "Qwen/Qwen2.5-7B-Instruct")
-        # Should return HuggingFace model (free tier)
-        model = get_model()
-        assert isinstance(model, HuggingFaceModel)
     def test_openai_env_takes_priority_over_huggingface(self, monkeypatch):
         """OpenAI env key present → OpenAI wins over HuggingFace."""

         model = get_model()
         assert isinstance(model, HuggingFaceModel)
+    def test_raises_when_no_api_keys_available(self, monkeypatch):
+        """No keys at all → RuntimeError with helpful message."""
         monkeypatch.setattr(settings, "openai_api_key", None)
+        monkeypatch.setattr(settings, "hf_token", None)
         monkeypatch.setattr(settings, "huggingface_model", "Qwen/Qwen2.5-7B-Instruct")
+        # Also ensure HF_TOKEN env var is not set
+        monkeypatch.delenv("HF_TOKEN", raising=False)
+        # Should raise clear error when no tokens available
+        import pytest
+        with pytest.raises(RuntimeError) as exc_info:
+            get_model()
+        assert "No LLM API key available" in str(exc_info.value)
+        assert "HF_TOKEN" in str(exc_info.value)
     def test_openai_env_takes_priority_over_huggingface(self, monkeypatch):
         """OpenAI env key present → OpenAI wins over HuggingFace."""