Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

VibecoderMcSwaggins commited on 14 days ago

Commit

17bd211

unverified ·

2 Parent(s): cd004e1 1bc9785

Merge pull request #15 from The-Obstacle-Is-The-Way/feat/phase10-clinicaltrials

Browse files

Files changed (9) hide show

docs/implementation/10_phase_clinicaltrials.md +79 -98
examples/search_demo/run_search.py +6 -3
pyproject.toml +10 -13
src/app.py +4 -3
src/tools/clinicaltrials.py +129 -0
src/tools/search_handler.py +5 -5
src/utils/models.py +5 -2
tests/unit/tools/test_clinicaltrials.py +138 -0
uv.lock +2 -0

docs/implementation/10_phase_clinicaltrials.md CHANGED Viewed

@@ -115,12 +115,28 @@ Evidence(
 ## 4. Implementation
 ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
 ```python
 """ClinicalTrials.gov search tool using API v2."""
-import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.utils.exceptions import SearchError
@@ -128,10 +144,14 @@ from src.utils.models import Citation, Evidence
 class ClinicalTrialsTool:
-    """Search tool for ClinicalTrials.gov."""
     BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
-    FIELDS = [
         "NCTId",
         "BriefTitle",
         "Phase",
@@ -152,34 +172,33 @@ class ClinicalTrialsTool:
         reraise=True,
     )
     async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
-        """
-        Search ClinicalTrials.gov for studies.
-        Args:
-            query: Search query (e.g., "metformin alzheimer")
-            max_results: Maximum results to return
-        Returns:
-            List of Evidence objects from clinical trials
-        """
         params = {
             "query.term": query,
             "pageSize": min(max_results, 100),
             "fields": "|".join(self.FIELDS),
         }
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            try:
-                response = await client.get(self.BASE_URL, params=params)
-                response.raise_for_status()
-            except httpx.HTTPStatusError as e:
-                raise SearchError(f"ClinicalTrials.gov search failed: {e}") from e
             data = response.json()
             studies = data.get("studies", [])
             return [self._study_to_evidence(study) for study in studies[:max_results]]
     def _study_to_evidence(self, study: dict) -> Evidence:
         """Convert a clinical trial study to Evidence."""
         # Navigate nested structure
@@ -240,19 +259,23 @@ class ClinicalTrialsTool:
 ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
 ```python
 """Unit tests for ClinicalTrials.gov tool."""
 import pytest
-import respx
-from httpx import Response
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.utils.models import Evidence
 @pytest.fixture
-def mock_clinicaltrials_response():
     """Mock ClinicalTrials.gov API response."""
     return {
         "studies": [
@@ -260,26 +283,20 @@ def mock_clinicaltrials_response():
                 "protocolSection": {
                     "identificationModule": {
                         "nctId": "NCT04098666",
-                        "briefTitle": "Metformin in Alzheimer's Dementia Prevention"
                     },
                     "statusModule": {
                         "overallStatus": "Recruiting",
-                        "startDateStruct": {"date": "2020-01-15"}
                     },
                     "descriptionModule": {
                         "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
                     },
-                    "designModule": {
-                        "phases": ["PHASE2"]
-                    },
-                    "conditionsModule": {
-                        "conditions": ["Alzheimer Disease", "Dementia"]
-                    },
                     "armsInterventionsModule": {
-                        "interventions": [
-                            {"name": "Metformin", "type": "Drug"}
-                        ]
-                    }
                 }
             }
         ]
@@ -289,81 +306,45 @@ def mock_clinicaltrials_response():
 class TestClinicalTrialsTool:
     """Tests for ClinicalTrialsTool."""
-    def test_tool_name(self):
         """Tool should have correct name."""
         tool = ClinicalTrialsTool()
         assert tool.name == "clinicaltrials"
     @pytest.mark.asyncio
-    @respx.mock
-    async def test_search_returns_evidence(self, mock_clinicaltrials_response):
         """Search should return Evidence objects."""
-        respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
-            return_value=Response(200, json=mock_clinicaltrials_response)
-        )
-        tool = ClinicalTrialsTool()
-        results = await tool.search("metformin alzheimer", max_results=5)
-        assert len(results) == 1
-        assert isinstance(results[0], Evidence)
-        assert results[0].citation.source == "clinicaltrials"
-        assert "NCT04098666" in results[0].citation.url
-        assert "Metformin" in results[0].citation.title
     @pytest.mark.asyncio
-    @respx.mock
-    async def test_search_extracts_phase(self, mock_clinicaltrials_response):
-        """Search should extract trial phase."""
-        respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
-            return_value=Response(200, json=mock_clinicaltrials_response)
-        )
-        tool = ClinicalTrialsTool()
-        results = await tool.search("metformin alzheimer")
-        assert "PHASE2" in results[0].content
-    @pytest.mark.asyncio
-    @respx.mock
-    async def test_search_extracts_status(self, mock_clinicaltrials_response):
-        """Search should extract trial status."""
-        respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
-            return_value=Response(200, json=mock_clinicaltrials_response)
-        )
-        tool = ClinicalTrialsTool()
-        results = await tool.search("metformin alzheimer")
-        assert "Recruiting" in results[0].content
-    @pytest.mark.asyncio
-    @respx.mock
-    async def test_search_empty_results(self):
-        """Search should handle empty results gracefully."""
-        respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
-            return_value=Response(200, json={"studies": []})
-        )
-        tool = ClinicalTrialsTool()
-        results = await tool.search("nonexistent query xyz")
-        assert results == []
-    @pytest.mark.asyncio
-    @respx.mock
-    async def test_search_api_error(self):
         """Search should raise SearchError on API failure."""
-        from src.utils.exceptions import SearchError
-        respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
-            return_value=Response(500, text="Internal Server Error")
-        )
-        tool = ClinicalTrialsTool()
-        with pytest.raises(SearchError):
-            await tool.search("metformin alzheimer")
 class TestClinicalTrialsIntegration:
@@ -371,7 +352,7 @@ class TestClinicalTrialsIntegration:
     @pytest.mark.integration
     @pytest.mark.asyncio
-    async def test_real_api_call(self):
         """Test actual API call (requires network)."""
         tool = ClinicalTrialsTool()
         results = await tool.search("metformin diabetes", max_results=3)

 ## 4. Implementation
+### 4.0 Important: HTTP Client Selection
+**ClinicalTrials.gov's WAF blocks `httpx`'s TLS fingerprint.** Use `requests` instead.
+| Library | Status | Notes |
+|---------|--------|-------|
+| `httpx` | ❌ 403 Blocked | TLS/JA3 fingerprint flagged |
+| `httpx[http2]` | ❌ 403 Blocked | HTTP/2 doesn't help |
+| `requests` | ✅ Works | Industry standard, not blocked |
+| `urllib` | ✅ Works | Stdlib alternative |
+We use `requests` wrapped in `asyncio.to_thread()` for async compatibility.
 ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
 ```python
 """ClinicalTrials.gov search tool using API v2."""
+import asyncio
+from typing import Any, ClassVar
+import requests
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.utils.exceptions import SearchError
 class ClinicalTrialsTool:
+    """Search tool for ClinicalTrials.gov.
+    Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
+    WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
+    """
     BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
+    FIELDS: ClassVar[list[str]] = [
         "NCTId",
         "BriefTitle",
         "Phase",
         reraise=True,
     )
     async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """Search ClinicalTrials.gov for studies."""
         params = {
             "query.term": query,
             "pageSize": min(max_results, 100),
             "fields": "|".join(self.FIELDS),
         }
+        try:
+            # Run blocking requests.get in a separate thread for async compatibility
+            response = await asyncio.to_thread(
+                requests.get,
+                self.BASE_URL,
+                params=params,
+                headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
+                timeout=30,
+            )
+            response.raise_for_status()
             data = response.json()
             studies = data.get("studies", [])
             return [self._study_to_evidence(study) for study in studies[:max_results]]
+        except requests.HTTPError as e:
+            raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
+        except requests.RequestException as e:
+            raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
     def _study_to_evidence(self, study: dict) -> Evidence:
         """Convert a clinical trial study to Evidence."""
         # Navigate nested structure
 ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
+Uses `unittest.mock.patch` to mock `requests.get` (not `respx` since we're not using `httpx`).
 ```python
 """Unit tests for ClinicalTrials.gov tool."""
+from unittest.mock import MagicMock, patch
 import pytest
+import requests
 from src.tools.clinicaltrials import ClinicalTrialsTool
+from src.utils.exceptions import SearchError
 from src.utils.models import Evidence
 @pytest.fixture
+def mock_clinicaltrials_response() -> dict:
     """Mock ClinicalTrials.gov API response."""
     return {
         "studies": [
                 "protocolSection": {
                     "identificationModule": {
                         "nctId": "NCT04098666",
+                        "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
                     },
                     "statusModule": {
                         "overallStatus": "Recruiting",
+                        "startDateStruct": {"date": "2020-01-15"},
                     },
                     "descriptionModule": {
                         "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
                     },
+                    "designModule": {"phases": ["PHASE2"]},
+                    "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
                     "armsInterventionsModule": {
+                        "interventions": [{"name": "Metformin", "type": "Drug"}]
+                    },
                 }
             }
         ]
 class TestClinicalTrialsTool:
     """Tests for ClinicalTrialsTool."""
+    def test_tool_name(self) -> None:
         """Tool should have correct name."""
         tool = ClinicalTrialsTool()
         assert tool.name == "clinicaltrials"
     @pytest.mark.asyncio
+    async def test_search_returns_evidence(
+        self, mock_clinicaltrials_response: dict
+    ) -> None:
         """Search should return Evidence objects."""
+        with patch("src.tools.clinicaltrials.requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.json.return_value = mock_clinicaltrials_response
+            mock_response.raise_for_status = MagicMock()
+            mock_get.return_value = mock_response
+            tool = ClinicalTrialsTool()
+            results = await tool.search("metformin alzheimer", max_results=5)
+            assert len(results) == 1
+            assert isinstance(results[0], Evidence)
+            assert results[0].citation.source == "clinicaltrials"
+            assert "NCT04098666" in results[0].citation.url
+            assert "Metformin" in results[0].citation.title
     @pytest.mark.asyncio
+    async def test_search_api_error(self) -> None:
         """Search should raise SearchError on API failure."""
+        with patch("src.tools.clinicaltrials.requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.raise_for_status.side_effect = requests.HTTPError(
+                "500 Server Error"
+            )
+            mock_get.return_value = mock_response
+            tool = ClinicalTrialsTool()
+            with pytest.raises(SearchError):
+                await tool.search("metformin alzheimer")
 class TestClinicalTrialsIntegration:
     @pytest.mark.integration
     @pytest.mark.asyncio
+    async def test_real_api_call(self) -> None:
         """Test actual API call (requires network)."""
         tool = ClinicalTrialsTool()
         results = await tool.search("metformin diabetes", max_results=3)

examples/search_demo/run_search.py CHANGED Viewed

@@ -2,8 +2,9 @@
 """
 Demo: Search for drug repurposing evidence.
-This script demonstrates Phase 2 functionality:
 - PubMed search (biomedical literature)
 - SearchHandler (parallel scatter-gather orchestration)
 Usage:
@@ -20,6 +21,7 @@ Requirements:
 import asyncio
 import sys
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
@@ -33,10 +35,11 @@ async def main(query: str) -> None:
     # Initialize tools
     pubmed = PubMedTool()
-    handler = SearchHandler(tools=[pubmed], timeout=30.0)
     # Execute search
-    print("Searching PubMed in parallel...")
     result = await handler.execute(query, max_results_per_tool=5)
     # Display results

 """
 Demo: Search for drug repurposing evidence.
+This script demonstrates multi-source search functionality:
 - PubMed search (biomedical literature)
+- ClinicalTrials.gov search (clinical trial evidence)
 - SearchHandler (parallel scatter-gather orchestration)
 Usage:
 import asyncio
 import sys
+from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
     # Initialize tools
     pubmed = PubMedTool()
+    trials = ClinicalTrialsTool()
+    handler = SearchHandler(tools=[pubmed, trials], timeout=30.0)
     # Execute search
+    print("Searching PubMed and ClinicalTrials.gov in parallel...")
     result = await handler.execute(query, max_results_per_tool=5)
     # Display results

pyproject.toml CHANGED Viewed

@@ -7,25 +7,22 @@ requires-python = ">=3.11"
 dependencies = [
     # Core
     "pydantic>=2.7",
-    "pydantic-settings>=2.2",      # For BaseSettings (config)
-    "pydantic-ai>=0.0.16",          # Agent framework
     # AI Providers
     "openai>=1.0.0",
     "anthropic>=0.18.0",
     # HTTP & Parsing
-    "httpx>=0.27",                   # Async HTTP client
-    "beautifulsoup4>=4.12",          # HTML parsing
-    "xmltodict>=0.13",               # PubMed XML -> dict
     # UI
-    "gradio>=5.0",                   # Chat interface
     # Utils
-    "python-dotenv>=1.0",            # .env loading
-    "tenacity>=8.2",                 # Retry logic
-    "structlog>=24.1",               # Structured logging
 ]
 [project.optional-dependencies]

 dependencies = [
     # Core
     "pydantic>=2.7",
+    "pydantic-settings>=2.2", # For BaseSettings (config)
+    "pydantic-ai>=0.0.16", # Agent framework
     # AI Providers
     "openai>=1.0.0",
     "anthropic>=0.18.0",
     # HTTP & Parsing
+    "httpx>=0.27", # Async HTTP client (PubMed)
+    "beautifulsoup4>=4.12", # HTML parsing
+    "xmltodict>=0.13", # PubMed XML -> dict
     # UI
+    "gradio>=5.0", # Chat interface
     # Utils
+    "python-dotenv>=1.0", # .env loading
+    "tenacity>=8.2", # Retry logic
+    "structlog>=24.1", # Structured logging
+    "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
 ]
 [project.optional-dependencies]

src/app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import gradio as gr
 from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
 from src.orchestrator_factory import create_orchestrator
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.models import OrchestratorConfig
@@ -32,7 +33,7 @@ def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
     # Create search tools
     search_handler = SearchHandler(
-        tools=[PubMedTool()],
         timeout=config.search_timeout,
     )
@@ -127,7 +128,7 @@ def create_demo() -> Any:
         ## AI-Powered Drug Repurposing Research Agent
         Ask questions about potential drug repurposing opportunities.
-        The agent will search PubMed, evaluate evidence, and provide recommendations.
         **Example questions:**
         - "What drugs could be repurposed for Alzheimer's disease?"
@@ -160,7 +161,7 @@ def create_demo() -> Any:
         **Note**: This is a research tool and should not be used for medical decisions.
         Always consult healthcare professionals for medical advice.
-        Built with 🤖 PydanticAI + 🔬 PubMed
         """)
     return demo

 from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
 from src.orchestrator_factory import create_orchestrator
+from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.models import OrchestratorConfig
     # Create search tools
     search_handler = SearchHandler(
+        tools=[PubMedTool(), ClinicalTrialsTool()],
         timeout=config.search_timeout,
     )
         ## AI-Powered Drug Repurposing Research Agent
         Ask questions about potential drug repurposing opportunities.
+        The agent searches PubMed & ClinicalTrials.gov to provide recommendations.
         **Example questions:**
         - "What drugs could be repurposed for Alzheimer's disease?"
         **Note**: This is a research tool and should not be used for medical decisions.
         Always consult healthcare professionals for medical advice.
+        Built with 🤖 PydanticAI + 🔬 PubMed & ClinicalTrials.gov
         """)
     return demo

src/tools/clinicaltrials.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""ClinicalTrials.gov search tool using API v2."""
+import asyncio
+from typing import Any, ClassVar
+import requests
+from tenacity import retry, stop_after_attempt, wait_exponential
+from src.utils.exceptions import SearchError
+from src.utils.models import Citation, Evidence
+class ClinicalTrialsTool:
+    """Search tool for ClinicalTrials.gov.
+    Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
+    WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
+    See: https://clinicaltrials.gov/data-api/api
+    """
+    BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
+    FIELDS: ClassVar[list[str]] = [
+        "NCTId",
+        "BriefTitle",
+        "Phase",
+        "OverallStatus",
+        "Condition",
+        "InterventionName",
+        "StartDate",
+        "BriefSummary",
+    ]
+    @property
+    def name(self) -> str:
+        return "clinicaltrials"
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        reraise=True,
+    )
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        """Search ClinicalTrials.gov for studies.
+        Args:
+            query: Search query (e.g., "metformin alzheimer")
+            max_results: Maximum results to return (max 100)
+        Returns:
+            List of Evidence objects from clinical trials
+        """
+        params: dict[str, str | int] = {
+            "query.term": query,
+            "pageSize": min(max_results, 100),
+            "fields": "|".join(self.FIELDS),
+        }
+        try:
+            # Run blocking requests.get in a separate thread for async compatibility
+            response = await asyncio.to_thread(
+                requests.get,
+                self.BASE_URL,
+                params=params,
+                headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
+                timeout=30,
+            )
+            response.raise_for_status()
+            data = response.json()
+            studies = data.get("studies", [])
+            return [self._study_to_evidence(study) for study in studies[:max_results]]
+        except requests.HTTPError as e:
+            raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
+        except requests.RequestException as e:
+            raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
+    def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
+        """Convert a clinical trial study to Evidence."""
+        # Navigate nested structure
+        protocol = study.get("protocolSection", {})
+        id_module = protocol.get("identificationModule", {})
+        status_module = protocol.get("statusModule", {})
+        desc_module = protocol.get("descriptionModule", {})
+        design_module = protocol.get("designModule", {})
+        conditions_module = protocol.get("conditionsModule", {})
+        arms_module = protocol.get("armsInterventionsModule", {})
+        nct_id = id_module.get("nctId", "Unknown")
+        title = id_module.get("briefTitle", "Untitled Study")
+        status = status_module.get("overallStatus", "Unknown")
+        start_date = status_module.get("startDateStruct", {}).get("date", "Unknown")
+        # Get phase (might be a list)
+        phases = design_module.get("phases", [])
+        phase = phases[0] if phases else "Not Applicable"
+        # Get conditions
+        conditions = conditions_module.get("conditions", [])
+        conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown"
+        # Get interventions
+        interventions = arms_module.get("interventions", [])
+        intervention_names = [i.get("name", "") for i in interventions[:3]]
+        interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown"
+        # Get summary
+        summary = desc_module.get("briefSummary", "No summary available.")
+        # Build content with key trial info
+        content = (
+            f"{summary[:500]}... "
+            f"Trial Phase: {phase}. "
+            f"Status: {status}. "
+            f"Conditions: {conditions_str}. "
+            f"Interventions: {interventions_str}."
+        )
+        return Evidence(
+            content=content[:2000],
+            citation=Citation(
+                source="clinicaltrials",
+                title=title[:500],
+                url=f"https://clinicaltrials.gov/study/{nct_id}",
+                date=start_date,
+                authors=[],  # Trials don't have traditional authors
+            ),
+            relevance=0.85,  # Trials are highly relevant for repurposing
+        )

src/tools/search_handler.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """Search handler - orchestrates multiple search tools."""
 import asyncio
-from typing import Literal, cast
 import structlog
 from src.tools.base import SearchTool
 from src.utils.exceptions import SearchError
-from src.utils.models import Evidence, SearchResult
 logger = structlog.get_logger()
@@ -49,7 +49,7 @@ class SearchHandler:
         # Process results
         all_evidence: list[Evidence] = []
-        sources_searched: list[Literal["pubmed"]] = []
         errors: list[str] = []
         for tool, result in zip(self.tools, results, strict=True):
@@ -61,8 +61,8 @@ class SearchHandler:
                 success_result = cast(list[Evidence], result)
                 all_evidence.extend(success_result)
-                # Cast tool.name to the expected Literal
-                tool_name = cast(Literal["pubmed"], tool.name)
                 sources_searched.append(tool_name)
                 logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))

 """Search handler - orchestrates multiple search tools."""
 import asyncio
+from typing import cast
 import structlog
 from src.tools.base import SearchTool
 from src.utils.exceptions import SearchError
+from src.utils.models import Evidence, SearchResult, SourceName
 logger = structlog.get_logger()
         # Process results
         all_evidence: list[Evidence] = []
+        sources_searched: list[SourceName] = []
         errors: list[str] = []
         for tool, result in zip(self.tools, results, strict=True):
                 success_result = cast(list[Evidence], result)
                 all_evidence.extend(success_result)
+                # Cast tool.name to SourceName (centralized type from models)
+                tool_name = cast(SourceName, tool.name)
                 sources_searched.append(tool_name)
                 logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))

src/utils/models.py CHANGED Viewed

@@ -5,11 +5,14 @@ from typing import Any, ClassVar, Literal
 from pydantic import BaseModel, Field
 class Citation(BaseModel):
     """A citation to a source document."""
-    source: Literal["pubmed"] = Field(description="Where this came from")
     title: str = Field(min_length=1, max_length=500)
     url: str = Field(description="URL to the source")
     date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
@@ -41,7 +44,7 @@ class SearchResult(BaseModel):
     query: str
     evidence: list[Evidence]
-    sources_searched: list[Literal["pubmed"]]
     total_found: int
     errors: list[str] = Field(default_factory=list)

 from pydantic import BaseModel, Field
+# Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
+SourceName = Literal["pubmed", "clinicaltrials"]
 class Citation(BaseModel):
     """A citation to a source document."""
+    source: SourceName = Field(description="Where this came from")
     title: str = Field(min_length=1, max_length=500)
     url: str = Field(description="URL to the source")
     date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
     query: str
     evidence: list[Evidence]
+    sources_searched: list[SourceName]
     total_found: int
     errors: list[str] = Field(default_factory=list)

tests/unit/tools/test_clinicaltrials.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""Unit tests for ClinicalTrials.gov tool."""
+from collections.abc import Generator
+from typing import Any
+from unittest.mock import MagicMock, patch
+import pytest
+import requests
+from src.tools.clinicaltrials import ClinicalTrialsTool
+from src.utils.exceptions import SearchError
+from src.utils.models import Evidence
+@pytest.fixture
+def mock_clinicaltrials_response() -> dict[str, Any]:
+    """Mock ClinicalTrials.gov API response."""
+    return {
+        "studies": [
+            {
+                "protocolSection": {
+                    "identificationModule": {
+                        "nctId": "NCT04098666",
+                        "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
+                    },
+                    "statusModule": {
+                        "overallStatus": "Recruiting",
+                        "startDateStruct": {"date": "2020-01-15"},
+                    },
+                    "descriptionModule": {
+                        "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
+                    },
+                    "designModule": {"phases": ["PHASE2"]},
+                    "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
+                    "armsInterventionsModule": {
+                        "interventions": [{"name": "Metformin", "type": "Drug"}]
+                    },
+                }
+            }
+        ]
+    }
+@pytest.fixture
+def mock_requests_get(
+    mock_clinicaltrials_response: dict[str, Any],
+) -> Generator[MagicMock, None, None]:
+    """Fixture to mock requests.get with a successful response."""
+    with patch("src.tools.clinicaltrials.requests.get") as mock_get:
+        mock_response = MagicMock()
+        mock_response.json.return_value = mock_clinicaltrials_response
+        mock_response.raise_for_status = MagicMock()
+        mock_get.return_value = mock_response
+        yield mock_get
+class TestClinicalTrialsTool:
+    """Tests for ClinicalTrialsTool."""
+    def test_tool_name(self) -> None:
+        """Tool should have correct name."""
+        tool = ClinicalTrialsTool()
+        assert tool.name == "clinicaltrials"
+    @pytest.mark.asyncio
+    async def test_search_returns_evidence(self, mock_requests_get: MagicMock) -> None:
+        """Search should return Evidence objects."""
+        tool = ClinicalTrialsTool()
+        results = await tool.search("metformin alzheimer", max_results=5)
+        assert len(results) == 1
+        assert isinstance(results[0], Evidence)
+        assert results[0].citation.source == "clinicaltrials"
+        assert "NCT04098666" in results[0].citation.url
+        assert "Metformin" in results[0].citation.title
+    @pytest.mark.asyncio
+    async def test_search_extracts_phase(self, mock_requests_get: MagicMock) -> None:
+        """Search should extract trial phase."""
+        tool = ClinicalTrialsTool()
+        results = await tool.search("metformin alzheimer")
+        assert "PHASE2" in results[0].content
+    @pytest.mark.asyncio
+    async def test_search_extracts_status(self, mock_requests_get: MagicMock) -> None:
+        """Search should extract trial status."""
+        tool = ClinicalTrialsTool()
+        results = await tool.search("metformin alzheimer")
+        assert "Recruiting" in results[0].content
+    @pytest.mark.asyncio
+    async def test_search_empty_results(self) -> None:
+        """Search should handle empty results gracefully."""
+        with patch("src.tools.clinicaltrials.requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.json.return_value = {"studies": []}
+            mock_response.raise_for_status = MagicMock()
+            mock_get.return_value = mock_response
+            tool = ClinicalTrialsTool()
+            results = await tool.search("nonexistent query xyz")
+            assert results == []
+    @pytest.mark.asyncio
+    async def test_search_api_error(self) -> None:
+        """Search should raise SearchError on API failure.
+        Note: We patch the retry decorator to avoid 3x backoff delay in tests.
+        """
+        with patch("src.tools.clinicaltrials.requests.get") as mock_get:
+            mock_response = MagicMock()
+            mock_response.raise_for_status.side_effect = requests.HTTPError("500 Server Error")
+            mock_get.return_value = mock_response
+            tool = ClinicalTrialsTool()
+            # Patch the retry decorator's stop condition to fail immediately
+            tool.search.retry.stop = lambda _: True  # type: ignore[attr-defined]
+            with pytest.raises(SearchError):
+                await tool.search("metformin alzheimer")
+class TestClinicalTrialsIntegration:
+    """Integration tests (marked for separate run)."""
+    @pytest.mark.integration
+    @pytest.mark.asyncio
+    async def test_real_api_call(self) -> None:
+        """Test actual API call (requires network)."""
+        tool = ClinicalTrialsTool()
+        results = await tool.search("metformin diabetes", max_results=3)
+        assert len(results) > 0
+        assert all(isinstance(r, Evidence) for r in results)
+        assert all(r.citation.source == "clinicaltrials" for r in results)

uv.lock CHANGED Viewed

@@ -988,6 +988,7 @@ dependencies = [
     { name = "pydantic-ai" },
     { name = "pydantic-settings" },
     { name = "python-dotenv" },
     { name = "structlog" },
     { name = "tenacity" },
     { name = "xmltodict" },
@@ -1033,6 +1034,7 @@ requires-dist = [
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
     { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
     { name = "python-dotenv", specifier = ">=1.0" },
     { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
     { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },

     { name = "pydantic-ai" },
     { name = "pydantic-settings" },
     { name = "python-dotenv" },
+    { name = "requests" },
     { name = "structlog" },
     { name = "tenacity" },
     { name = "xmltodict" },
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
     { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
     { name = "python-dotenv", specifier = ">=1.0" },
+    { name = "requests", specifier = ">=2.32.5" },
     { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
     { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },