VibecoderMcSwaggins commited on
Commit
25c3a8b
Β·
1 Parent(s): 9760706

feat(phase4): Orchestrator and UI complete

Browse files
Files changed (6) hide show
  1. Dockerfile +33 -0
  2. README.md +54 -4
  3. src/app.py +153 -0
  4. src/orchestrator.py +336 -0
  5. src/utils/models.py +46 -1
  6. tests/unit/test_orchestrator.py +285 -0
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile for DeepCritical
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Install uv
13
+ RUN pip install uv
14
+
15
+ # Copy project files
16
+ COPY pyproject.toml .
17
+ COPY uv.lock .
18
+ COPY src/ src/
19
+ COPY README.md .
20
+
21
+ # Install dependencies
22
+ RUN uv sync --frozen --no-dev
23
+
24
+ # Expose port
25
+ EXPOSE 7860
26
+
27
+ # Set environment variables
28
+ ENV GRADIO_SERVER_NAME=0.0.0.0
29
+ ENV GRADIO_SERVER_PORT=7860
30
+ ENV PYTHONPATH=/app
31
+
32
+ # Run the app
33
+ CMD ["uv", "run", "python", "-m", "src.app"]
README.md CHANGED
@@ -1,15 +1,65 @@
1
  ---
2
  title: DeepCritical
3
- emoji: πŸ“ˆ
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 6.0.0
8
  app_file: src/app.py
9
  pinned: false
10
  license: mit
11
- short_description: Deep Search for Critical Research [BigData] -> [Actionable]
12
  ---
13
 
14
- ### DeepCritical
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
1
  ---
2
  title: DeepCritical
3
+ emoji: 🧬
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.0.0
8
  app_file: src/app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
+ # DeepCritical
14
+
15
+ AI-Powered Drug Repurposing Research Agent
16
+
17
+ ## Quick Start
18
+
19
+ ### 1. Environment Setup
20
+
21
+ ```bash
22
+ # Install uv if you haven't already
23
+ pip install uv
24
+
25
+ # Sync dependencies
26
+ uv sync
27
+ ```
28
+
29
+ ### 2. Run the UI
30
+
31
+ ```bash
32
+ # Start the Gradio app
33
+ uv run python -m src.app
34
+ ```
35
+
36
+ Open your browser to `http://localhost:7860`.
37
+
38
+ ## Development
39
+
40
+ ### Run Tests
41
+
42
+ ```bash
43
+ uv run pytest
44
+ ```
45
+
46
+ ### Run Checks
47
+
48
+ ```bash
49
+ make check
50
+ ```
51
+
52
+ ## Architecture
53
+
54
+ DeepCritical uses a Vertical Slice Architecture:
55
+
56
+ 1. **Search Slice**: Retrieving evidence from PubMed and the Web.
57
+ 2. **Judge Slice**: Evaluating evidence quality using LLMs.
58
+ 3. **Orchestrator Slice**: Managing the research loop and UI.
59
+
60
+ Built with:
61
+ - **PydanticAI**: For robust agent interactions.
62
+ - **Gradio**: For the streaming user interface.
63
+ - **PubMed**: For biomedical literature.
64
+ - **DuckDuckGo**: For general web search.
65
 
src/app.py CHANGED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio UI for DeepCritical agent."""
2
+
3
+ import os
4
+ from collections.abc import AsyncGenerator
5
+ from typing import Any
6
+
7
+ import gradio as gr
8
+
9
+ from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
+ from src.orchestrator import Orchestrator
11
+ from src.tools.pubmed import PubMedTool
12
+ from src.tools.search_handler import SearchHandler
13
+ from src.tools.websearch import WebTool
14
+ from src.utils.models import OrchestratorConfig
15
+
16
+
17
+ def create_orchestrator(use_mock: bool = False) -> Orchestrator:
18
+ """
19
+ Create an orchestrator instance.
20
+
21
+ Args:
22
+ use_mock: If True, use MockJudgeHandler (no API key needed)
23
+
24
+ Returns:
25
+ Configured Orchestrator instance
26
+ """
27
+ # Create search tools
28
+ search_handler = SearchHandler(
29
+ tools=[PubMedTool(), WebTool()],
30
+ timeout=30.0,
31
+ )
32
+
33
+ # Create judge (mock or real)
34
+ judge_handler: JudgeHandler | MockJudgeHandler
35
+ if use_mock:
36
+ judge_handler = MockJudgeHandler()
37
+ else:
38
+ judge_handler = JudgeHandler()
39
+
40
+ # Create orchestrator
41
+ config = OrchestratorConfig(
42
+ max_iterations=5,
43
+ max_results_per_tool=10,
44
+ )
45
+
46
+ return Orchestrator(
47
+ search_handler=search_handler,
48
+ judge_handler=judge_handler,
49
+ config=config,
50
+ )
51
+
52
+
53
+ async def research_agent(
54
+ message: str,
55
+ history: list[dict[str, Any]],
56
+ ) -> AsyncGenerator[str, None]:
57
+ """
58
+ Gradio chat function that runs the research agent.
59
+
60
+ Args:
61
+ message: User's research question
62
+ history: Chat history (Gradio format)
63
+
64
+ Yields:
65
+ Markdown-formatted responses for streaming
66
+ """
67
+ if not message.strip():
68
+ yield "Please enter a research question."
69
+ return
70
+
71
+ # Create orchestrator (use mock if no API key)
72
+ use_mock = not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"))
73
+ orchestrator = create_orchestrator(use_mock=use_mock)
74
+
75
+ # Run the agent and stream events
76
+ response_parts = []
77
+
78
+ try:
79
+ async for event in orchestrator.run(message):
80
+ # Format event as markdown
81
+ event_md = event.to_markdown()
82
+ response_parts.append(event_md)
83
+
84
+ # If complete, show full response
85
+ if event.type == "complete":
86
+ yield event.message
87
+ else:
88
+ # Show progress
89
+ yield "\n\n".join(response_parts)
90
+
91
+ except Exception as e:
92
+ yield f"❌ **Error**: {e!s}"
93
+
94
+
95
+ def create_demo() -> Any:
96
+ """
97
+ Create the Gradio demo interface.
98
+
99
+ Returns:
100
+ Configured Gradio Blocks interface
101
+ """
102
+ with gr.Blocks(
103
+ title="DeepCritical - Drug Repurposing Research Agent",
104
+ theme=gr.themes.Soft(),
105
+ ) as demo:
106
+ gr.Markdown("""
107
+ # 🧬 DeepCritical
108
+ ## AI-Powered Drug Repurposing Research Agent
109
+
110
+ Ask questions about potential drug repurposing opportunities.
111
+ The agent will search PubMed and the web, evaluate evidence, and provide recommendations.
112
+
113
+ **Example questions:**
114
+ - "What drugs could be repurposed for Alzheimer's disease?"
115
+ - "Is metformin effective for cancer treatment?"
116
+ - "What existing medications show promise for Long COVID?"
117
+ """)
118
+
119
+ gr.ChatInterface(
120
+ fn=research_agent,
121
+ type="messages",
122
+ title="",
123
+ examples=[
124
+ "What drugs could be repurposed for Alzheimer's disease?",
125
+ "Is metformin effective for treating cancer?",
126
+ "What medications show promise for Long COVID treatment?",
127
+ "Can statins be repurposed for neurological conditions?",
128
+ ],
129
+ )
130
+
131
+ gr.Markdown("""
132
+ ---
133
+ **Note**: This is a research tool and should not be used for medical decisions.
134
+ Always consult healthcare professionals for medical advice.
135
+
136
+ Built with πŸ€– PydanticAI + πŸ”¬ PubMed + πŸ¦† DuckDuckGo
137
+ """)
138
+
139
+ return demo
140
+
141
+
142
+ def main() -> None:
143
+ """Run the Gradio app."""
144
+ demo = create_demo()
145
+ demo.launch(
146
+ server_name="0.0.0.0",
147
+ server_port=7860,
148
+ share=False,
149
+ )
150
+
151
+
152
+ if __name__ == "__main__":
153
+ main()
src/orchestrator.py CHANGED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Orchestrator - the agent loop connecting Search and Judge."""
2
+
3
+ import asyncio
4
+ from collections.abc import AsyncGenerator
5
+ from typing import Any, Protocol
6
+
7
+ import structlog
8
+
9
+ from src.utils.models import (
10
+ AgentEvent,
11
+ Evidence,
12
+ JudgeAssessment,
13
+ OrchestratorConfig,
14
+ SearchResult,
15
+ )
16
+
17
+ logger = structlog.get_logger()
18
+
19
+
20
+ class SearchHandlerProtocol(Protocol):
21
+ """Protocol for search handler."""
22
+
23
+ async def execute(self, query: str, max_results_per_tool: int = 10) -> SearchResult: ...
24
+
25
+
26
+ class JudgeHandlerProtocol(Protocol):
27
+ """Protocol for judge handler."""
28
+
29
+ async def assess(self, question: str, evidence: list[Evidence]) -> JudgeAssessment: ...
30
+
31
+
32
+ class Orchestrator:
33
+ """
34
+ The agent orchestrator - runs the Search -> Judge -> Loop cycle.
35
+
36
+ This is a generator-based design that yields events for real-time UI updates.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ search_handler: SearchHandlerProtocol,
42
+ judge_handler: JudgeHandlerProtocol,
43
+ config: OrchestratorConfig | None = None,
44
+ ):
45
+ """
46
+ Initialize the orchestrator.
47
+
48
+ Args:
49
+ search_handler: Handler for executing searches
50
+ judge_handler: Handler for assessing evidence
51
+ config: Optional configuration (uses defaults if not provided)
52
+ """
53
+ self.search = search_handler
54
+ self.judge = judge_handler
55
+ self.config = config or OrchestratorConfig()
56
+ self.history: list[dict[str, Any]] = []
57
+
58
+ async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
59
+ """
60
+ Run the agent loop for a query.
61
+
62
+ Yields AgentEvent objects for each step, allowing real-time UI updates.
63
+
64
+ Args:
65
+ query: The user's research question
66
+
67
+ Yields:
68
+ AgentEvent objects for each step of the process
69
+ """
70
+ logger.info("Starting orchestrator", query=query)
71
+
72
+ yield AgentEvent(
73
+ type="started",
74
+ message=f"Starting research for: {query}",
75
+ iteration=0,
76
+ )
77
+
78
+ all_evidence: list[Evidence] = []
79
+ current_queries = [query]
80
+ iteration = 0
81
+
82
+ while iteration < self.config.max_iterations:
83
+ iteration += 1
84
+ logger.info("Iteration", iteration=iteration, queries=current_queries)
85
+
86
+ # === SEARCH PHASE ===
87
+ yield AgentEvent(
88
+ type="searching",
89
+ message=f"Searching for: {', '.join(current_queries[:3])}...",
90
+ iteration=iteration,
91
+ )
92
+
93
+ try:
94
+ # Execute searches for all current queries
95
+ search_tasks = [
96
+ self.search.execute(q, self.config.max_results_per_tool)
97
+ for q in current_queries[:3] # Limit to 3 queries per iteration
98
+ ]
99
+ search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
100
+
101
+ # Collect evidence from successful searches
102
+ new_evidence: list[Evidence] = []
103
+ errors: list[str] = []
104
+
105
+ for q, result in zip(current_queries[:3], search_results, strict=False):
106
+ if isinstance(result, Exception):
107
+ errors.append(f"Search for '{q}' failed: {result!s}")
108
+ elif isinstance(result, SearchResult):
109
+ new_evidence.extend(result.evidence)
110
+ errors.extend(result.errors)
111
+ else:
112
+ # Should not happen with return_exceptions=True but safe fallback
113
+ errors.append(f"Unknown result type for '{q}': {type(result)}")
114
+
115
+ # Deduplicate evidence by URL
116
+ seen_urls = {e.citation.url for e in all_evidence}
117
+ unique_new = [e for e in new_evidence if e.citation.url not in seen_urls]
118
+ all_evidence.extend(unique_new)
119
+
120
+ yield AgentEvent(
121
+ type="search_complete",
122
+ message=f"Found {len(unique_new)} new sources ({len(all_evidence)} total)",
123
+ data={
124
+ "new_count": len(unique_new),
125
+ "total_count": len(all_evidence),
126
+ },
127
+ iteration=iteration,
128
+ )
129
+
130
+ if errors:
131
+ logger.warning("Search errors", errors=errors)
132
+
133
+ except Exception as e:
134
+ logger.error("Search phase failed", error=str(e))
135
+ yield AgentEvent(
136
+ type="error",
137
+ message=f"Search failed: {e!s}",
138
+ iteration=iteration,
139
+ )
140
+ continue
141
+
142
+ # === JUDGE PHASE ===
143
+ yield AgentEvent(
144
+ type="judging",
145
+ message=f"Evaluating {len(all_evidence)} sources...",
146
+ iteration=iteration,
147
+ )
148
+
149
+ try:
150
+ assessment = await self.judge.assess(query, all_evidence)
151
+
152
+ yield AgentEvent(
153
+ type="judge_complete",
154
+ message=(
155
+ f"Assessment: {assessment.recommendation} "
156
+ f"(confidence: {assessment.confidence:.0%})"
157
+ ),
158
+ data={
159
+ "sufficient": assessment.sufficient,
160
+ "confidence": assessment.confidence,
161
+ "mechanism_score": assessment.details.mechanism_score,
162
+ "clinical_score": assessment.details.clinical_evidence_score,
163
+ },
164
+ iteration=iteration,
165
+ )
166
+
167
+ # Record this iteration in history
168
+ self.history.append(
169
+ {
170
+ "iteration": iteration,
171
+ "queries": current_queries,
172
+ "evidence_count": len(all_evidence),
173
+ "assessment": assessment.model_dump(),
174
+ }
175
+ )
176
+
177
+ # === DECISION PHASE ===
178
+ if assessment.sufficient and assessment.recommendation == "synthesize":
179
+ yield AgentEvent(
180
+ type="synthesizing",
181
+ message="Evidence sufficient! Preparing synthesis...",
182
+ iteration=iteration,
183
+ )
184
+
185
+ # Generate final response
186
+ final_response = self._generate_synthesis(query, all_evidence, assessment)
187
+
188
+ yield AgentEvent(
189
+ type="complete",
190
+ message=final_response,
191
+ data={
192
+ "evidence_count": len(all_evidence),
193
+ "iterations": iteration,
194
+ "drug_candidates": assessment.details.drug_candidates,
195
+ "key_findings": assessment.details.key_findings,
196
+ },
197
+ iteration=iteration,
198
+ )
199
+ return
200
+
201
+ else:
202
+ # Need more evidence - prepare next queries
203
+ current_queries = assessment.next_search_queries or [
204
+ f"{query} mechanism of action",
205
+ f"{query} clinical evidence",
206
+ ]
207
+
208
+ yield AgentEvent(
209
+ type="looping",
210
+ message=(
211
+ f"Need more evidence. "
212
+ f"Next searches: {', '.join(current_queries[:2])}..."
213
+ ),
214
+ data={"next_queries": current_queries},
215
+ iteration=iteration,
216
+ )
217
+
218
+ except Exception as e:
219
+ logger.error("Judge phase failed", error=str(e))
220
+ yield AgentEvent(
221
+ type="error",
222
+ message=f"Assessment failed: {e!s}",
223
+ iteration=iteration,
224
+ )
225
+ continue
226
+
227
+ # Max iterations reached
228
+ yield AgentEvent(
229
+ type="complete",
230
+ message=self._generate_partial_synthesis(query, all_evidence),
231
+ data={
232
+ "evidence_count": len(all_evidence),
233
+ "iterations": iteration,
234
+ "max_reached": True,
235
+ },
236
+ iteration=iteration,
237
+ )
238
+
239
+ def _generate_synthesis(
240
+ self,
241
+ query: str,
242
+ evidence: list[Evidence],
243
+ assessment: JudgeAssessment,
244
+ ) -> str:
245
+ """
246
+ Generate the final synthesis response.
247
+
248
+ Args:
249
+ query: The original question
250
+ evidence: All collected evidence
251
+ assessment: The final assessment
252
+
253
+ Returns:
254
+ Formatted synthesis as markdown
255
+ """
256
+ drug_list = (
257
+ "\n".join([f"- **{d}**" for d in assessment.details.drug_candidates])
258
+ or "- No specific candidates identified"
259
+ )
260
+ findings_list = (
261
+ "\n".join([f"- {f}" for f in assessment.details.key_findings]) or "- See evidence below"
262
+ )
263
+
264
+ citations = "\n".join(
265
+ [
266
+ f"{i+1}. [{e.citation.title}]({e.citation.url}) "
267
+ f"({e.citation.source.upper()}, {e.citation.date})"
268
+ for i, e in enumerate(evidence[:10]) # Limit to 10 citations
269
+ ]
270
+ )
271
+
272
+ return f"""## Drug Repurposing Analysis
273
+
274
+ ### Question
275
+ {query}
276
+
277
+ ### Drug Candidates
278
+ {drug_list}
279
+
280
+ ### Key Findings
281
+ {findings_list}
282
+
283
+ ### Assessment
284
+ - **Mechanism Score**: {assessment.details.mechanism_score}/10
285
+ - **Clinical Evidence Score**: {assessment.details.clinical_evidence_score}/10
286
+ - **Confidence**: {assessment.confidence:.0%}
287
+
288
+ ### Reasoning
289
+ {assessment.reasoning}
290
+
291
+ ### Citations ({len(evidence)} sources)
292
+ {citations}
293
+
294
+ ---
295
+ *Analysis based on {len(evidence)} sources across {len(self.history)} iterations.*
296
+ """
297
+
298
+ def _generate_partial_synthesis(
299
+ self,
300
+ query: str,
301
+ evidence: list[Evidence],
302
+ ) -> str:
303
+ """
304
+ Generate a partial synthesis when max iterations reached.
305
+
306
+ Args:
307
+ query: The original question
308
+ evidence: All collected evidence
309
+
310
+ Returns:
311
+ Formatted partial synthesis as markdown
312
+ """
313
+ citations = "\n".join(
314
+ [
315
+ f"{i+1}. [{e.citation.title}]({e.citation.url}) ({e.citation.source.upper()})"
316
+ for i, e in enumerate(evidence[:10])
317
+ ]
318
+ )
319
+
320
+ return f"""## Partial Analysis (Max Iterations Reached)
321
+
322
+ ### Question
323
+ {query}
324
+
325
+ ### Status
326
+ Maximum search iterations reached. The evidence gathered may be incomplete.
327
+
328
+ ### Evidence Collected
329
+ Found {len(evidence)} sources. Consider refining your query for more specific results.
330
+
331
+ ### Citations
332
+ {citations}
333
+
334
+ ---
335
+ *Consider searching with more specific terms or drug names.*
336
+ """
src/utils/models.py CHANGED
@@ -1,6 +1,7 @@
1
  """Data models for the Search feature."""
2
 
3
- from typing import ClassVar, Literal
 
4
 
5
  from pydantic import BaseModel, Field
6
 
@@ -90,3 +91,47 @@ class JudgeAssessment(BaseModel):
90
  reasoning: str = Field(
91
  ..., min_length=20, description="Overall reasoning for the recommendation"
92
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Data models for the Search feature."""
2
 
3
+ from datetime import datetime
4
+ from typing import Any, ClassVar, Literal
5
 
6
  from pydantic import BaseModel, Field
7
 
 
91
  reasoning: str = Field(
92
  ..., min_length=20, description="Overall reasoning for the recommendation"
93
  )
94
+
95
+
96
+ class AgentEvent(BaseModel):
97
+ """Event emitted by the orchestrator for UI streaming."""
98
+
99
+ type: Literal[
100
+ "started",
101
+ "searching",
102
+ "search_complete",
103
+ "judging",
104
+ "judge_complete",
105
+ "looping",
106
+ "synthesizing",
107
+ "complete",
108
+ "error",
109
+ ]
110
+ message: str
111
+ data: Any = None
112
+ timestamp: datetime = Field(default_factory=datetime.now)
113
+ iteration: int = 0
114
+
115
+ def to_markdown(self) -> str:
116
+ """Format event as markdown for chat display."""
117
+ icons = {
118
+ "started": "πŸš€",
119
+ "searching": "πŸ”",
120
+ "search_complete": "πŸ“š",
121
+ "judging": "🧠",
122
+ "judge_complete": "βœ…",
123
+ "looping": "πŸ”„",
124
+ "synthesizing": "πŸ“",
125
+ "complete": "πŸŽ‰",
126
+ "error": "❌",
127
+ }
128
+ icon = icons.get(self.type, "β€’")
129
+ return f"{icon} **{self.type.upper()}**: {self.message}"
130
+
131
+
132
+ class OrchestratorConfig(BaseModel):
133
+ """Configuration for the orchestrator."""
134
+
135
+ max_iterations: int = Field(default=5, ge=1, le=10)
136
+ max_results_per_tool: int = Field(default=10, ge=1, le=50)
137
+ search_timeout: float = Field(default=30.0, ge=5.0, le=120.0)
tests/unit/test_orchestrator.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for Orchestrator."""
2
+
3
+ from unittest.mock import AsyncMock
4
+
5
+ import pytest
6
+
7
+ from src.orchestrator import Orchestrator
8
+ from src.utils.models import (
9
+ AgentEvent,
10
+ AssessmentDetails,
11
+ Citation,
12
+ Evidence,
13
+ JudgeAssessment,
14
+ OrchestratorConfig,
15
+ SearchResult,
16
+ )
17
+
18
+
19
+ class TestOrchestrator:
20
+ """Tests for Orchestrator."""
21
+
22
+ @pytest.fixture
23
+ def mock_search_handler(self):
24
+ """Create a mock search handler."""
25
+ handler = AsyncMock()
26
+ handler.execute = AsyncMock(
27
+ return_value=SearchResult(
28
+ query="test",
29
+ evidence=[
30
+ Evidence(
31
+ content="Test content",
32
+ citation=Citation(
33
+ source="pubmed",
34
+ title="Test Title",
35
+ url="https://pubmed.ncbi.nlm.nih.gov/12345/",
36
+ date="2024-01-01",
37
+ ),
38
+ ),
39
+ ],
40
+ sources_searched=["pubmed"],
41
+ total_found=1,
42
+ errors=[],
43
+ )
44
+ )
45
+ return handler
46
+
47
+ @pytest.fixture
48
+ def mock_judge_sufficient(self):
49
+ """Create a mock judge that returns sufficient."""
50
+ handler = AsyncMock()
51
+ handler.assess = AsyncMock(
52
+ return_value=JudgeAssessment(
53
+ details=AssessmentDetails(
54
+ mechanism_score=8,
55
+ mechanism_reasoning="Good mechanism",
56
+ clinical_evidence_score=7,
57
+ clinical_reasoning="Good clinical",
58
+ drug_candidates=["Drug A"],
59
+ key_findings=["Finding 1"],
60
+ ),
61
+ sufficient=True,
62
+ confidence=0.85,
63
+ recommendation="synthesize",
64
+ next_search_queries=[],
65
+ reasoning="Evidence is sufficient",
66
+ )
67
+ )
68
+ return handler
69
+
70
+ @pytest.fixture
71
+ def mock_judge_insufficient(self):
72
+ """Create a mock judge that returns insufficient."""
73
+ handler = AsyncMock()
74
+ handler.assess = AsyncMock(
75
+ return_value=JudgeAssessment(
76
+ details=AssessmentDetails(
77
+ mechanism_score=4,
78
+ mechanism_reasoning="Weak mechanism",
79
+ clinical_evidence_score=3,
80
+ clinical_reasoning="Weak clinical",
81
+ drug_candidates=[],
82
+ key_findings=[],
83
+ ),
84
+ sufficient=False,
85
+ confidence=0.3,
86
+ recommendation="continue",
87
+ next_search_queries=["more specific query"],
88
+ reasoning="Need more evidence to make a decision.",
89
+ )
90
+ )
91
+ return handler
92
+
93
+ @pytest.mark.asyncio
94
+ async def test_orchestrator_completes_with_sufficient_evidence(
95
+ self,
96
+ mock_search_handler,
97
+ mock_judge_sufficient,
98
+ ):
99
+ """Orchestrator should complete when evidence is sufficient."""
100
+ config = OrchestratorConfig(max_iterations=5)
101
+ orchestrator = Orchestrator(
102
+ search_handler=mock_search_handler,
103
+ judge_handler=mock_judge_sufficient,
104
+ config=config,
105
+ )
106
+
107
+ events = []
108
+ async for event in orchestrator.run("test query"):
109
+ events.append(event)
110
+
111
+ # Should have started, searched, judged, and completed
112
+ event_types = [e.type for e in events]
113
+ assert "started" in event_types
114
+ assert "searching" in event_types
115
+ assert "search_complete" in event_types
116
+ assert "judging" in event_types
117
+ assert "judge_complete" in event_types
118
+ assert "complete" in event_types
119
+
120
+ # Should only have 1 iteration
121
+ complete_event = next(e for e in events if e.type == "complete")
122
+ assert complete_event.iteration == 1
123
+
124
+ @pytest.mark.asyncio
125
+ async def test_orchestrator_loops_when_insufficient(
126
+ self,
127
+ mock_search_handler,
128
+ mock_judge_insufficient,
129
+ ):
130
+ """Orchestrator should loop when evidence is insufficient."""
131
+ config = OrchestratorConfig(max_iterations=3)
132
+ orchestrator = Orchestrator(
133
+ search_handler=mock_search_handler,
134
+ judge_handler=mock_judge_insufficient,
135
+ config=config,
136
+ )
137
+
138
+ events = []
139
+ async for event in orchestrator.run("test query"):
140
+ events.append(event)
141
+
142
+ # Should have looping events
143
+ event_types = [e.type for e in events]
144
+ assert event_types.count("looping") >= 2 # noqa: PLR2004
145
+
146
+ # Should hit max iterations
147
+ complete_event = next(e for e in events if e.type == "complete")
148
+ assert complete_event.data.get("max_reached") is True
149
+
150
+ @pytest.mark.asyncio
151
+ async def test_orchestrator_respects_max_iterations(
152
+ self,
153
+ mock_search_handler,
154
+ mock_judge_insufficient,
155
+ ):
156
+ """Orchestrator should stop at max_iterations."""
157
+ config = OrchestratorConfig(max_iterations=2)
158
+ orchestrator = Orchestrator(
159
+ search_handler=mock_search_handler,
160
+ judge_handler=mock_judge_insufficient,
161
+ config=config,
162
+ )
163
+
164
+ events = []
165
+ async for event in orchestrator.run("test query"):
166
+ events.append(event)
167
+
168
+ # Should have exactly 2 iterations
169
+ max_iteration = max(e.iteration for e in events)
170
+ assert max_iteration == 2 # noqa: PLR2004
171
+
172
+ @pytest.mark.asyncio
173
+ async def test_orchestrator_handles_search_error(self):
174
+ """Orchestrator should handle search errors gracefully."""
175
+ mock_search = AsyncMock()
176
+ mock_search.execute = AsyncMock(side_effect=Exception("Search failed"))
177
+
178
+ mock_judge = AsyncMock()
179
+ mock_judge.assess = AsyncMock(
180
+ return_value=JudgeAssessment(
181
+ details=AssessmentDetails(
182
+ mechanism_score=0,
183
+ mechanism_reasoning="Not applicable here.",
184
+ clinical_evidence_score=0,
185
+ clinical_reasoning="Not applicable here.",
186
+ drug_candidates=[],
187
+ key_findings=[],
188
+ ),
189
+ sufficient=False,
190
+ confidence=0.0,
191
+ recommendation="continue",
192
+ next_search_queries=["retry query"],
193
+ reasoning="Search failed, retrying...",
194
+ )
195
+ )
196
+
197
+ config = OrchestratorConfig(max_iterations=2)
198
+ orchestrator = Orchestrator(
199
+ search_handler=mock_search,
200
+ judge_handler=mock_judge,
201
+ config=config,
202
+ )
203
+
204
+ events = []
205
+ async for event in orchestrator.run("test query"):
206
+ events.append(event)
207
+
208
+ # Should recover and loop despite errors
209
+ event_types = [e.type for e in events]
210
+ assert "error" not in event_types
211
+ assert "looping" in event_types
212
+
213
+ @pytest.mark.asyncio
214
+ async def test_orchestrator_deduplicates_evidence(self, mock_judge_insufficient):
215
+ """Orchestrator should deduplicate evidence by URL."""
216
+ # Search returns same evidence each time
217
+ duplicate_evidence = Evidence(
218
+ content="Duplicate content",
219
+ citation=Citation(
220
+ source="pubmed",
221
+ title="Same Title",
222
+ url="https://pubmed.ncbi.nlm.nih.gov/12345/", # Same URL
223
+ date="2024-01-01",
224
+ ),
225
+ )
226
+
227
+ mock_search = AsyncMock()
228
+ mock_search.execute = AsyncMock(
229
+ return_value=SearchResult(
230
+ query="test",
231
+ evidence=[duplicate_evidence],
232
+ sources_searched=["pubmed"],
233
+ total_found=1,
234
+ errors=[],
235
+ )
236
+ )
237
+
238
+ config = OrchestratorConfig(max_iterations=2)
239
+ orchestrator = Orchestrator(
240
+ search_handler=mock_search,
241
+ judge_handler=mock_judge_insufficient,
242
+ config=config,
243
+ )
244
+
245
+ events = []
246
+ async for event in orchestrator.run("test query"):
247
+ events.append(event)
248
+
249
+ # Second search_complete should show 0 new evidence
250
+ search_complete_events = [e for e in events if e.type == "search_complete"]
251
+ assert len(search_complete_events) == 2 # noqa: PLR2004
252
+
253
+ # First iteration should have 1 new
254
+ assert search_complete_events[0].data["new_count"] == 1
255
+
256
+ # Second iteration should have 0 new (duplicate)
257
+ assert search_complete_events[1].data["new_count"] == 0
258
+
259
+
260
+ class TestAgentEvent:
261
+ """Tests for AgentEvent."""
262
+
263
+ def test_to_markdown(self):
264
+ """AgentEvent should format to markdown correctly."""
265
+ event = AgentEvent(
266
+ type="searching",
267
+ message="Searching for: metformin alzheimer",
268
+ iteration=1,
269
+ )
270
+
271
+ md = event.to_markdown()
272
+ assert "πŸ”" in md
273
+ assert "SEARCHING" in md
274
+ assert "metformin alzheimer" in md
275
+
276
+ def test_complete_event_icon(self):
277
+ """Complete event should have celebration icon."""
278
+ event = AgentEvent(
279
+ type="complete",
280
+ message="Done!",
281
+ iteration=3,
282
+ )
283
+
284
+ md = event.to_markdown()
285
+ assert "πŸŽ‰" in md