VibecoderMcSwaggins commited on
Commit
17bd211
·
unverified ·
2 Parent(s): cd004e1 1bc9785

Merge pull request #15 from The-Obstacle-Is-The-Way/feat/phase10-clinicaltrials

Browse files
docs/implementation/10_phase_clinicaltrials.md CHANGED
@@ -115,12 +115,28 @@ Evidence(
115
 
116
  ## 4. Implementation
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
119
 
120
  ```python
121
  """ClinicalTrials.gov search tool using API v2."""
122
 
123
- import httpx
 
 
 
124
  from tenacity import retry, stop_after_attempt, wait_exponential
125
 
126
  from src.utils.exceptions import SearchError
@@ -128,10 +144,14 @@ from src.utils.models import Citation, Evidence
128
 
129
 
130
  class ClinicalTrialsTool:
131
- """Search tool for ClinicalTrials.gov."""
 
 
 
 
132
 
133
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
134
- FIELDS = [
135
  "NCTId",
136
  "BriefTitle",
137
  "Phase",
@@ -152,34 +172,33 @@ class ClinicalTrialsTool:
152
  reraise=True,
153
  )
154
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
155
- """
156
- Search ClinicalTrials.gov for studies.
157
-
158
- Args:
159
- query: Search query (e.g., "metformin alzheimer")
160
- max_results: Maximum results to return
161
-
162
- Returns:
163
- List of Evidence objects from clinical trials
164
- """
165
  params = {
166
  "query.term": query,
167
  "pageSize": min(max_results, 100),
168
  "fields": "|".join(self.FIELDS),
169
  }
170
 
171
- async with httpx.AsyncClient(timeout=30.0) as client:
172
- try:
173
- response = await client.get(self.BASE_URL, params=params)
174
- response.raise_for_status()
175
- except httpx.HTTPStatusError as e:
176
- raise SearchError(f"ClinicalTrials.gov search failed: {e}") from e
 
 
 
 
177
 
178
  data = response.json()
179
  studies = data.get("studies", [])
180
-
181
  return [self._study_to_evidence(study) for study in studies[:max_results]]
182
 
 
 
 
 
 
183
  def _study_to_evidence(self, study: dict) -> Evidence:
184
  """Convert a clinical trial study to Evidence."""
185
  # Navigate nested structure
@@ -240,19 +259,23 @@ class ClinicalTrialsTool:
240
 
241
  ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
242
 
 
 
243
  ```python
244
  """Unit tests for ClinicalTrials.gov tool."""
245
 
 
 
246
  import pytest
247
- import respx
248
- from httpx import Response
249
 
250
  from src.tools.clinicaltrials import ClinicalTrialsTool
 
251
  from src.utils.models import Evidence
252
 
253
 
254
  @pytest.fixture
255
- def mock_clinicaltrials_response():
256
  """Mock ClinicalTrials.gov API response."""
257
  return {
258
  "studies": [
@@ -260,26 +283,20 @@ def mock_clinicaltrials_response():
260
  "protocolSection": {
261
  "identificationModule": {
262
  "nctId": "NCT04098666",
263
- "briefTitle": "Metformin in Alzheimer's Dementia Prevention"
264
  },
265
  "statusModule": {
266
  "overallStatus": "Recruiting",
267
- "startDateStruct": {"date": "2020-01-15"}
268
  },
269
  "descriptionModule": {
270
  "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
271
  },
272
- "designModule": {
273
- "phases": ["PHASE2"]
274
- },
275
- "conditionsModule": {
276
- "conditions": ["Alzheimer Disease", "Dementia"]
277
- },
278
  "armsInterventionsModule": {
279
- "interventions": [
280
- {"name": "Metformin", "type": "Drug"}
281
- ]
282
- }
283
  }
284
  }
285
  ]
@@ -289,81 +306,45 @@ def mock_clinicaltrials_response():
289
  class TestClinicalTrialsTool:
290
  """Tests for ClinicalTrialsTool."""
291
 
292
- def test_tool_name(self):
293
  """Tool should have correct name."""
294
  tool = ClinicalTrialsTool()
295
  assert tool.name == "clinicaltrials"
296
 
297
  @pytest.mark.asyncio
298
- @respx.mock
299
- async def test_search_returns_evidence(self, mock_clinicaltrials_response):
 
300
  """Search should return Evidence objects."""
301
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
302
- return_value=Response(200, json=mock_clinicaltrials_response)
303
- )
 
 
304
 
305
- tool = ClinicalTrialsTool()
306
- results = await tool.search("metformin alzheimer", max_results=5)
307
 
308
- assert len(results) == 1
309
- assert isinstance(results[0], Evidence)
310
- assert results[0].citation.source == "clinicaltrials"
311
- assert "NCT04098666" in results[0].citation.url
312
- assert "Metformin" in results[0].citation.title
313
 
314
  @pytest.mark.asyncio
315
- @respx.mock
316
- async def test_search_extracts_phase(self, mock_clinicaltrials_response):
317
- """Search should extract trial phase."""
318
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
319
- return_value=Response(200, json=mock_clinicaltrials_response)
320
- )
321
-
322
- tool = ClinicalTrialsTool()
323
- results = await tool.search("metformin alzheimer")
324
-
325
- assert "PHASE2" in results[0].content
326
-
327
- @pytest.mark.asyncio
328
- @respx.mock
329
- async def test_search_extracts_status(self, mock_clinicaltrials_response):
330
- """Search should extract trial status."""
331
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
332
- return_value=Response(200, json=mock_clinicaltrials_response)
333
- )
334
-
335
- tool = ClinicalTrialsTool()
336
- results = await tool.search("metformin alzheimer")
337
-
338
- assert "Recruiting" in results[0].content
339
-
340
- @pytest.mark.asyncio
341
- @respx.mock
342
- async def test_search_empty_results(self):
343
- """Search should handle empty results gracefully."""
344
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
345
- return_value=Response(200, json={"studies": []})
346
- )
347
-
348
- tool = ClinicalTrialsTool()
349
- results = await tool.search("nonexistent query xyz")
350
-
351
- assert results == []
352
-
353
- @pytest.mark.asyncio
354
- @respx.mock
355
- async def test_search_api_error(self):
356
  """Search should raise SearchError on API failure."""
357
- from src.utils.exceptions import SearchError
358
-
359
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
360
- return_value=Response(500, text="Internal Server Error")
361
- )
 
362
 
363
- tool = ClinicalTrialsTool()
364
 
365
- with pytest.raises(SearchError):
366
- await tool.search("metformin alzheimer")
367
 
368
 
369
  class TestClinicalTrialsIntegration:
@@ -371,7 +352,7 @@ class TestClinicalTrialsIntegration:
371
 
372
  @pytest.mark.integration
373
  @pytest.mark.asyncio
374
- async def test_real_api_call(self):
375
  """Test actual API call (requires network)."""
376
  tool = ClinicalTrialsTool()
377
  results = await tool.search("metformin diabetes", max_results=3)
 
115
 
116
  ## 4. Implementation
117
 
118
+ ### 4.0 Important: HTTP Client Selection
119
+
120
+ **ClinicalTrials.gov's WAF blocks `httpx`'s TLS fingerprint.** Use `requests` instead.
121
+
122
+ | Library | Status | Notes |
123
+ |---------|--------|-------|
124
+ | `httpx` | ❌ 403 Blocked | TLS/JA3 fingerprint flagged |
125
+ | `httpx[http2]` | ❌ 403 Blocked | HTTP/2 doesn't help |
126
+ | `requests` | ✅ Works | Industry standard, not blocked |
127
+ | `urllib` | ✅ Works | Stdlib alternative |
128
+
129
+ We use `requests` wrapped in `asyncio.to_thread()` for async compatibility.
130
+
131
  ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
132
 
133
  ```python
134
  """ClinicalTrials.gov search tool using API v2."""
135
 
136
+ import asyncio
137
+ from typing import Any, ClassVar
138
+
139
+ import requests
140
  from tenacity import retry, stop_after_attempt, wait_exponential
141
 
142
  from src.utils.exceptions import SearchError
 
144
 
145
 
146
  class ClinicalTrialsTool:
147
+ """Search tool for ClinicalTrials.gov.
148
+
149
+ Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
150
+ WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
151
+ """
152
 
153
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
154
+ FIELDS: ClassVar[list[str]] = [
155
  "NCTId",
156
  "BriefTitle",
157
  "Phase",
 
172
  reraise=True,
173
  )
174
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
175
+ """Search ClinicalTrials.gov for studies."""
 
 
 
 
 
 
 
 
 
176
  params = {
177
  "query.term": query,
178
  "pageSize": min(max_results, 100),
179
  "fields": "|".join(self.FIELDS),
180
  }
181
 
182
+ try:
183
+ # Run blocking requests.get in a separate thread for async compatibility
184
+ response = await asyncio.to_thread(
185
+ requests.get,
186
+ self.BASE_URL,
187
+ params=params,
188
+ headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
189
+ timeout=30,
190
+ )
191
+ response.raise_for_status()
192
 
193
  data = response.json()
194
  studies = data.get("studies", [])
 
195
  return [self._study_to_evidence(study) for study in studies[:max_results]]
196
 
197
+ except requests.HTTPError as e:
198
+ raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
199
+ except requests.RequestException as e:
200
+ raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
201
+
202
  def _study_to_evidence(self, study: dict) -> Evidence:
203
  """Convert a clinical trial study to Evidence."""
204
  # Navigate nested structure
 
259
 
260
  ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
261
 
262
+ Uses `unittest.mock.patch` to mock `requests.get` (not `respx` since we're not using `httpx`).
263
+
264
  ```python
265
  """Unit tests for ClinicalTrials.gov tool."""
266
 
267
+ from unittest.mock import MagicMock, patch
268
+
269
  import pytest
270
+ import requests
 
271
 
272
  from src.tools.clinicaltrials import ClinicalTrialsTool
273
+ from src.utils.exceptions import SearchError
274
  from src.utils.models import Evidence
275
 
276
 
277
  @pytest.fixture
278
+ def mock_clinicaltrials_response() -> dict:
279
  """Mock ClinicalTrials.gov API response."""
280
  return {
281
  "studies": [
 
283
  "protocolSection": {
284
  "identificationModule": {
285
  "nctId": "NCT04098666",
286
+ "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
287
  },
288
  "statusModule": {
289
  "overallStatus": "Recruiting",
290
+ "startDateStruct": {"date": "2020-01-15"},
291
  },
292
  "descriptionModule": {
293
  "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
294
  },
295
+ "designModule": {"phases": ["PHASE2"]},
296
+ "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
 
 
 
 
297
  "armsInterventionsModule": {
298
+ "interventions": [{"name": "Metformin", "type": "Drug"}]
299
+ },
 
 
300
  }
301
  }
302
  ]
 
306
  class TestClinicalTrialsTool:
307
  """Tests for ClinicalTrialsTool."""
308
 
309
+ def test_tool_name(self) -> None:
310
  """Tool should have correct name."""
311
  tool = ClinicalTrialsTool()
312
  assert tool.name == "clinicaltrials"
313
 
314
  @pytest.mark.asyncio
315
+ async def test_search_returns_evidence(
316
+ self, mock_clinicaltrials_response: dict
317
+ ) -> None:
318
  """Search should return Evidence objects."""
319
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
320
+ mock_response = MagicMock()
321
+ mock_response.json.return_value = mock_clinicaltrials_response
322
+ mock_response.raise_for_status = MagicMock()
323
+ mock_get.return_value = mock_response
324
 
325
+ tool = ClinicalTrialsTool()
326
+ results = await tool.search("metformin alzheimer", max_results=5)
327
 
328
+ assert len(results) == 1
329
+ assert isinstance(results[0], Evidence)
330
+ assert results[0].citation.source == "clinicaltrials"
331
+ assert "NCT04098666" in results[0].citation.url
332
+ assert "Metformin" in results[0].citation.title
333
 
334
  @pytest.mark.asyncio
335
+ async def test_search_api_error(self) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  """Search should raise SearchError on API failure."""
337
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
338
+ mock_response = MagicMock()
339
+ mock_response.raise_for_status.side_effect = requests.HTTPError(
340
+ "500 Server Error"
341
+ )
342
+ mock_get.return_value = mock_response
343
 
344
+ tool = ClinicalTrialsTool()
345
 
346
+ with pytest.raises(SearchError):
347
+ await tool.search("metformin alzheimer")
348
 
349
 
350
  class TestClinicalTrialsIntegration:
 
352
 
353
  @pytest.mark.integration
354
  @pytest.mark.asyncio
355
+ async def test_real_api_call(self) -> None:
356
  """Test actual API call (requires network)."""
357
  tool = ClinicalTrialsTool()
358
  results = await tool.search("metformin diabetes", max_results=3)
examples/search_demo/run_search.py CHANGED
@@ -2,8 +2,9 @@
2
  """
3
  Demo: Search for drug repurposing evidence.
4
 
5
- This script demonstrates Phase 2 functionality:
6
  - PubMed search (biomedical literature)
 
7
  - SearchHandler (parallel scatter-gather orchestration)
8
 
9
  Usage:
@@ -20,6 +21,7 @@ Requirements:
20
  import asyncio
21
  import sys
22
 
 
23
  from src.tools.pubmed import PubMedTool
24
  from src.tools.search_handler import SearchHandler
25
 
@@ -33,10 +35,11 @@ async def main(query: str) -> None:
33
 
34
  # Initialize tools
35
  pubmed = PubMedTool()
36
- handler = SearchHandler(tools=[pubmed], timeout=30.0)
 
37
 
38
  # Execute search
39
- print("Searching PubMed in parallel...")
40
  result = await handler.execute(query, max_results_per_tool=5)
41
 
42
  # Display results
 
2
  """
3
  Demo: Search for drug repurposing evidence.
4
 
5
+ This script demonstrates multi-source search functionality:
6
  - PubMed search (biomedical literature)
7
+ - ClinicalTrials.gov search (clinical trial evidence)
8
  - SearchHandler (parallel scatter-gather orchestration)
9
 
10
  Usage:
 
21
  import asyncio
22
  import sys
23
 
24
+ from src.tools.clinicaltrials import ClinicalTrialsTool
25
  from src.tools.pubmed import PubMedTool
26
  from src.tools.search_handler import SearchHandler
27
 
 
35
 
36
  # Initialize tools
37
  pubmed = PubMedTool()
38
+ trials = ClinicalTrialsTool()
39
+ handler = SearchHandler(tools=[pubmed, trials], timeout=30.0)
40
 
41
  # Execute search
42
+ print("Searching PubMed and ClinicalTrials.gov in parallel...")
43
  result = await handler.execute(query, max_results_per_tool=5)
44
 
45
  # Display results
pyproject.toml CHANGED
@@ -7,25 +7,22 @@ requires-python = ">=3.11"
7
  dependencies = [
8
  # Core
9
  "pydantic>=2.7",
10
- "pydantic-settings>=2.2", # For BaseSettings (config)
11
- "pydantic-ai>=0.0.16", # Agent framework
12
-
13
  # AI Providers
14
  "openai>=1.0.0",
15
  "anthropic>=0.18.0",
16
-
17
  # HTTP & Parsing
18
- "httpx>=0.27", # Async HTTP client
19
- "beautifulsoup4>=4.12", # HTML parsing
20
- "xmltodict>=0.13", # PubMed XML -> dict
21
-
22
  # UI
23
- "gradio>=5.0", # Chat interface
24
-
25
  # Utils
26
- "python-dotenv>=1.0", # .env loading
27
- "tenacity>=8.2", # Retry logic
28
- "structlog>=24.1", # Structured logging
 
29
  ]
30
 
31
  [project.optional-dependencies]
 
7
  dependencies = [
8
  # Core
9
  "pydantic>=2.7",
10
+ "pydantic-settings>=2.2", # For BaseSettings (config)
11
+ "pydantic-ai>=0.0.16", # Agent framework
 
12
  # AI Providers
13
  "openai>=1.0.0",
14
  "anthropic>=0.18.0",
 
15
  # HTTP & Parsing
16
+ "httpx>=0.27", # Async HTTP client (PubMed)
17
+ "beautifulsoup4>=4.12", # HTML parsing
18
+ "xmltodict>=0.13", # PubMed XML -> dict
 
19
  # UI
20
+ "gradio>=5.0", # Chat interface
 
21
  # Utils
22
+ "python-dotenv>=1.0", # .env loading
23
+ "tenacity>=8.2", # Retry logic
24
+ "structlog>=24.1", # Structured logging
25
+ "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
26
  ]
27
 
28
  [project.optional-dependencies]
src/app.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
 
9
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
  from src.orchestrator_factory import create_orchestrator
 
11
  from src.tools.pubmed import PubMedTool
12
  from src.tools.search_handler import SearchHandler
13
  from src.utils.models import OrchestratorConfig
@@ -32,7 +33,7 @@ def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
32
 
33
  # Create search tools
34
  search_handler = SearchHandler(
35
- tools=[PubMedTool()],
36
  timeout=config.search_timeout,
37
  )
38
 
@@ -127,7 +128,7 @@ def create_demo() -> Any:
127
  ## AI-Powered Drug Repurposing Research Agent
128
 
129
  Ask questions about potential drug repurposing opportunities.
130
- The agent will search PubMed, evaluate evidence, and provide recommendations.
131
 
132
  **Example questions:**
133
  - "What drugs could be repurposed for Alzheimer's disease?"
@@ -160,7 +161,7 @@ def create_demo() -> Any:
160
  **Note**: This is a research tool and should not be used for medical decisions.
161
  Always consult healthcare professionals for medical advice.
162
 
163
- Built with 🤖 PydanticAI + 🔬 PubMed
164
  """)
165
 
166
  return demo
 
8
 
9
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
  from src.orchestrator_factory import create_orchestrator
11
+ from src.tools.clinicaltrials import ClinicalTrialsTool
12
  from src.tools.pubmed import PubMedTool
13
  from src.tools.search_handler import SearchHandler
14
  from src.utils.models import OrchestratorConfig
 
33
 
34
  # Create search tools
35
  search_handler = SearchHandler(
36
+ tools=[PubMedTool(), ClinicalTrialsTool()],
37
  timeout=config.search_timeout,
38
  )
39
 
 
128
  ## AI-Powered Drug Repurposing Research Agent
129
 
130
  Ask questions about potential drug repurposing opportunities.
131
+ The agent searches PubMed & ClinicalTrials.gov to provide recommendations.
132
 
133
  **Example questions:**
134
  - "What drugs could be repurposed for Alzheimer's disease?"
 
161
  **Note**: This is a research tool and should not be used for medical decisions.
162
  Always consult healthcare professionals for medical advice.
163
 
164
+ Built with 🤖 PydanticAI + 🔬 PubMed & ClinicalTrials.gov
165
  """)
166
 
167
  return demo
src/tools/clinicaltrials.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClinicalTrials.gov search tool using API v2."""
2
+
3
+ import asyncio
4
+ from typing import Any, ClassVar
5
+
6
+ import requests
7
+ from tenacity import retry, stop_after_attempt, wait_exponential
8
+
9
+ from src.utils.exceptions import SearchError
10
+ from src.utils.models import Citation, Evidence
11
+
12
+
13
+ class ClinicalTrialsTool:
14
+ """Search tool for ClinicalTrials.gov.
15
+
16
+ Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
17
+ WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
18
+ See: https://clinicaltrials.gov/data-api/api
19
+ """
20
+
21
+ BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
22
+ FIELDS: ClassVar[list[str]] = [
23
+ "NCTId",
24
+ "BriefTitle",
25
+ "Phase",
26
+ "OverallStatus",
27
+ "Condition",
28
+ "InterventionName",
29
+ "StartDate",
30
+ "BriefSummary",
31
+ ]
32
+
33
+ @property
34
+ def name(self) -> str:
35
+ return "clinicaltrials"
36
+
37
+ @retry(
38
+ stop=stop_after_attempt(3),
39
+ wait=wait_exponential(multiplier=1, min=1, max=10),
40
+ reraise=True,
41
+ )
42
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
43
+ """Search ClinicalTrials.gov for studies.
44
+
45
+ Args:
46
+ query: Search query (e.g., "metformin alzheimer")
47
+ max_results: Maximum results to return (max 100)
48
+
49
+ Returns:
50
+ List of Evidence objects from clinical trials
51
+ """
52
+ params: dict[str, str | int] = {
53
+ "query.term": query,
54
+ "pageSize": min(max_results, 100),
55
+ "fields": "|".join(self.FIELDS),
56
+ }
57
+
58
+ try:
59
+ # Run blocking requests.get in a separate thread for async compatibility
60
+ response = await asyncio.to_thread(
61
+ requests.get,
62
+ self.BASE_URL,
63
+ params=params,
64
+ headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
65
+ timeout=30,
66
+ )
67
+ response.raise_for_status()
68
+
69
+ data = response.json()
70
+ studies = data.get("studies", [])
71
+ return [self._study_to_evidence(study) for study in studies[:max_results]]
72
+
73
+ except requests.HTTPError as e:
74
+ raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
75
+ except requests.RequestException as e:
76
+ raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
77
+
78
+ def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
79
+ """Convert a clinical trial study to Evidence."""
80
+ # Navigate nested structure
81
+ protocol = study.get("protocolSection", {})
82
+ id_module = protocol.get("identificationModule", {})
83
+ status_module = protocol.get("statusModule", {})
84
+ desc_module = protocol.get("descriptionModule", {})
85
+ design_module = protocol.get("designModule", {})
86
+ conditions_module = protocol.get("conditionsModule", {})
87
+ arms_module = protocol.get("armsInterventionsModule", {})
88
+
89
+ nct_id = id_module.get("nctId", "Unknown")
90
+ title = id_module.get("briefTitle", "Untitled Study")
91
+ status = status_module.get("overallStatus", "Unknown")
92
+ start_date = status_module.get("startDateStruct", {}).get("date", "Unknown")
93
+
94
+ # Get phase (might be a list)
95
+ phases = design_module.get("phases", [])
96
+ phase = phases[0] if phases else "Not Applicable"
97
+
98
+ # Get conditions
99
+ conditions = conditions_module.get("conditions", [])
100
+ conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown"
101
+
102
+ # Get interventions
103
+ interventions = arms_module.get("interventions", [])
104
+ intervention_names = [i.get("name", "") for i in interventions[:3]]
105
+ interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown"
106
+
107
+ # Get summary
108
+ summary = desc_module.get("briefSummary", "No summary available.")
109
+
110
+ # Build content with key trial info
111
+ content = (
112
+ f"{summary[:500]}... "
113
+ f"Trial Phase: {phase}. "
114
+ f"Status: {status}. "
115
+ f"Conditions: {conditions_str}. "
116
+ f"Interventions: {interventions_str}."
117
+ )
118
+
119
+ return Evidence(
120
+ content=content[:2000],
121
+ citation=Citation(
122
+ source="clinicaltrials",
123
+ title=title[:500],
124
+ url=f"https://clinicaltrials.gov/study/{nct_id}",
125
+ date=start_date,
126
+ authors=[], # Trials don't have traditional authors
127
+ ),
128
+ relevance=0.85, # Trials are highly relevant for repurposing
129
+ )
src/tools/search_handler.py CHANGED
@@ -1,13 +1,13 @@
1
  """Search handler - orchestrates multiple search tools."""
2
 
3
  import asyncio
4
- from typing import Literal, cast
5
 
6
  import structlog
7
 
8
  from src.tools.base import SearchTool
9
  from src.utils.exceptions import SearchError
10
- from src.utils.models import Evidence, SearchResult
11
 
12
  logger = structlog.get_logger()
13
 
@@ -49,7 +49,7 @@ class SearchHandler:
49
 
50
  # Process results
51
  all_evidence: list[Evidence] = []
52
- sources_searched: list[Literal["pubmed"]] = []
53
  errors: list[str] = []
54
 
55
  for tool, result in zip(self.tools, results, strict=True):
@@ -61,8 +61,8 @@ class SearchHandler:
61
  success_result = cast(list[Evidence], result)
62
  all_evidence.extend(success_result)
63
 
64
- # Cast tool.name to the expected Literal
65
- tool_name = cast(Literal["pubmed"], tool.name)
66
  sources_searched.append(tool_name)
67
  logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))
68
 
 
1
  """Search handler - orchestrates multiple search tools."""
2
 
3
  import asyncio
4
+ from typing import cast
5
 
6
  import structlog
7
 
8
  from src.tools.base import SearchTool
9
  from src.utils.exceptions import SearchError
10
+ from src.utils.models import Evidence, SearchResult, SourceName
11
 
12
  logger = structlog.get_logger()
13
 
 
49
 
50
  # Process results
51
  all_evidence: list[Evidence] = []
52
+ sources_searched: list[SourceName] = []
53
  errors: list[str] = []
54
 
55
  for tool, result in zip(self.tools, results, strict=True):
 
61
  success_result = cast(list[Evidence], result)
62
  all_evidence.extend(success_result)
63
 
64
+ # Cast tool.name to SourceName (centralized type from models)
65
+ tool_name = cast(SourceName, tool.name)
66
  sources_searched.append(tool_name)
67
  logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))
68
 
src/utils/models.py CHANGED
@@ -5,11 +5,14 @@ from typing import Any, ClassVar, Literal
5
 
6
  from pydantic import BaseModel, Field
7
 
 
 
 
8
 
9
  class Citation(BaseModel):
10
  """A citation to a source document."""
11
 
12
- source: Literal["pubmed"] = Field(description="Where this came from")
13
  title: str = Field(min_length=1, max_length=500)
14
  url: str = Field(description="URL to the source")
15
  date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
@@ -41,7 +44,7 @@ class SearchResult(BaseModel):
41
 
42
  query: str
43
  evidence: list[Evidence]
44
- sources_searched: list[Literal["pubmed"]]
45
  total_found: int
46
  errors: list[str] = Field(default_factory=list)
47
 
 
5
 
6
  from pydantic import BaseModel, Field
7
 
8
+ # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
9
+ SourceName = Literal["pubmed", "clinicaltrials"]
10
+
11
 
12
  class Citation(BaseModel):
13
  """A citation to a source document."""
14
 
15
+ source: SourceName = Field(description="Where this came from")
16
  title: str = Field(min_length=1, max_length=500)
17
  url: str = Field(description="URL to the source")
18
  date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
 
44
 
45
  query: str
46
  evidence: list[Evidence]
47
+ sources_searched: list[SourceName]
48
  total_found: int
49
  errors: list[str] = Field(default_factory=list)
50
 
tests/unit/tools/test_clinicaltrials.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for ClinicalTrials.gov tool."""
2
+
3
+ from collections.abc import Generator
4
+ from typing import Any
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pytest
8
+ import requests
9
+
10
+ from src.tools.clinicaltrials import ClinicalTrialsTool
11
+ from src.utils.exceptions import SearchError
12
+ from src.utils.models import Evidence
13
+
14
+
15
+ @pytest.fixture
16
+ def mock_clinicaltrials_response() -> dict[str, Any]:
17
+ """Mock ClinicalTrials.gov API response."""
18
+ return {
19
+ "studies": [
20
+ {
21
+ "protocolSection": {
22
+ "identificationModule": {
23
+ "nctId": "NCT04098666",
24
+ "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
25
+ },
26
+ "statusModule": {
27
+ "overallStatus": "Recruiting",
28
+ "startDateStruct": {"date": "2020-01-15"},
29
+ },
30
+ "descriptionModule": {
31
+ "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
32
+ },
33
+ "designModule": {"phases": ["PHASE2"]},
34
+ "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
35
+ "armsInterventionsModule": {
36
+ "interventions": [{"name": "Metformin", "type": "Drug"}]
37
+ },
38
+ }
39
+ }
40
+ ]
41
+ }
42
+
43
+
44
+ @pytest.fixture
45
+ def mock_requests_get(
46
+ mock_clinicaltrials_response: dict[str, Any],
47
+ ) -> Generator[MagicMock, None, None]:
48
+ """Fixture to mock requests.get with a successful response."""
49
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
50
+ mock_response = MagicMock()
51
+ mock_response.json.return_value = mock_clinicaltrials_response
52
+ mock_response.raise_for_status = MagicMock()
53
+ mock_get.return_value = mock_response
54
+ yield mock_get
55
+
56
+
57
+ class TestClinicalTrialsTool:
58
+ """Tests for ClinicalTrialsTool."""
59
+
60
+ def test_tool_name(self) -> None:
61
+ """Tool should have correct name."""
62
+ tool = ClinicalTrialsTool()
63
+ assert tool.name == "clinicaltrials"
64
+
65
+ @pytest.mark.asyncio
66
+ async def test_search_returns_evidence(self, mock_requests_get: MagicMock) -> None:
67
+ """Search should return Evidence objects."""
68
+ tool = ClinicalTrialsTool()
69
+ results = await tool.search("metformin alzheimer", max_results=5)
70
+
71
+ assert len(results) == 1
72
+ assert isinstance(results[0], Evidence)
73
+ assert results[0].citation.source == "clinicaltrials"
74
+ assert "NCT04098666" in results[0].citation.url
75
+ assert "Metformin" in results[0].citation.title
76
+
77
+ @pytest.mark.asyncio
78
+ async def test_search_extracts_phase(self, mock_requests_get: MagicMock) -> None:
79
+ """Search should extract trial phase."""
80
+ tool = ClinicalTrialsTool()
81
+ results = await tool.search("metformin alzheimer")
82
+
83
+ assert "PHASE2" in results[0].content
84
+
85
+ @pytest.mark.asyncio
86
+ async def test_search_extracts_status(self, mock_requests_get: MagicMock) -> None:
87
+ """Search should extract trial status."""
88
+ tool = ClinicalTrialsTool()
89
+ results = await tool.search("metformin alzheimer")
90
+
91
+ assert "Recruiting" in results[0].content
92
+
93
+ @pytest.mark.asyncio
94
+ async def test_search_empty_results(self) -> None:
95
+ """Search should handle empty results gracefully."""
96
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
97
+ mock_response = MagicMock()
98
+ mock_response.json.return_value = {"studies": []}
99
+ mock_response.raise_for_status = MagicMock()
100
+ mock_get.return_value = mock_response
101
+
102
+ tool = ClinicalTrialsTool()
103
+ results = await tool.search("nonexistent query xyz")
104
+
105
+ assert results == []
106
+
107
+ @pytest.mark.asyncio
108
+ async def test_search_api_error(self) -> None:
109
+ """Search should raise SearchError on API failure.
110
+
111
+ Note: We patch the retry decorator to avoid 3x backoff delay in tests.
112
+ """
113
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
114
+ mock_response = MagicMock()
115
+ mock_response.raise_for_status.side_effect = requests.HTTPError("500 Server Error")
116
+ mock_get.return_value = mock_response
117
+
118
+ tool = ClinicalTrialsTool()
119
+ # Patch the retry decorator's stop condition to fail immediately
120
+ tool.search.retry.stop = lambda _: True # type: ignore[attr-defined]
121
+
122
+ with pytest.raises(SearchError):
123
+ await tool.search("metformin alzheimer")
124
+
125
+
126
+ class TestClinicalTrialsIntegration:
127
+ """Integration tests (marked for separate run)."""
128
+
129
+ @pytest.mark.integration
130
+ @pytest.mark.asyncio
131
+ async def test_real_api_call(self) -> None:
132
+ """Test actual API call (requires network)."""
133
+ tool = ClinicalTrialsTool()
134
+ results = await tool.search("metformin diabetes", max_results=3)
135
+
136
+ assert len(results) > 0
137
+ assert all(isinstance(r, Evidence) for r in results)
138
+ assert all(r.citation.source == "clinicaltrials" for r in results)
uv.lock CHANGED
@@ -988,6 +988,7 @@ dependencies = [
988
  { name = "pydantic-ai" },
989
  { name = "pydantic-settings" },
990
  { name = "python-dotenv" },
 
991
  { name = "structlog" },
992
  { name = "tenacity" },
993
  { name = "xmltodict" },
@@ -1033,6 +1034,7 @@ requires-dist = [
1033
  { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
1034
  { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
1035
  { name = "python-dotenv", specifier = ">=1.0" },
 
1036
  { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
1037
  { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
1038
  { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },
 
988
  { name = "pydantic-ai" },
989
  { name = "pydantic-settings" },
990
  { name = "python-dotenv" },
991
+ { name = "requests" },
992
  { name = "structlog" },
993
  { name = "tenacity" },
994
  { name = "xmltodict" },
 
1034
  { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
1035
  { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
1036
  { name = "python-dotenv", specifier = ">=1.0" },
1037
+ { name = "requests", specifier = ">=2.32.5" },
1038
  { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
1039
  { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
1040
  { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },