Spaces:

MCP-1st-Birthday
/

DeepBoner

Running

App Files Files Community

DeepBoner / tests /unit /prompts /test_synthesis.py

VibecoderMcSwaggins

fix(SPEC_11): address CodeRabbit review feedback (#92)

89f1173 unverified 14 days ago

raw

history blame contribute delete

8.32 kB

	"""Tests for narrative synthesis prompts."""

	import pytest

	from src.prompts.synthesis import (
	FEW_SHOT_EXAMPLE,
	format_synthesis_prompt,
	get_synthesis_system_prompt,
	)


	@pytest.mark.unit
	class TestSynthesisSystemPrompt:
	"""Tests for synthesis system prompt generation."""

	def test_system_prompt_emphasizes_prose(self) -> None:
	"""System prompt should emphasize prose paragraphs, not bullets."""
	prompt = get_synthesis_system_prompt()
	assert "PROSE PARAGRAPHS" in prompt
	assert "not bullet points" in prompt.lower()

	def test_system_prompt_requires_executive_summary(self) -> None:
	"""System prompt should require executive summary section."""
	prompt = get_synthesis_system_prompt()
	assert "Executive Summary" in prompt
	assert "REQUIRED" in prompt

	def test_system_prompt_requires_background(self) -> None:
	"""System prompt should require background section."""
	prompt = get_synthesis_system_prompt()
	assert "Background" in prompt

	def test_system_prompt_requires_evidence_synthesis(self) -> None:
	"""System prompt should require evidence synthesis section."""
	prompt = get_synthesis_system_prompt()
	assert "Evidence Synthesis" in prompt
	assert "Mechanism of Action" in prompt

	def test_system_prompt_requires_recommendations(self) -> None:
	"""System prompt should require recommendations section."""
	prompt = get_synthesis_system_prompt()
	assert "Recommendations" in prompt

	def test_system_prompt_requires_limitations(self) -> None:
	"""System prompt should require limitations section."""
	prompt = get_synthesis_system_prompt()
	assert "Limitations" in prompt

	def test_system_prompt_warns_about_hallucination(self) -> None:
	"""System prompt should warn about citation hallucination."""
	prompt = get_synthesis_system_prompt()
	assert "NEVER hallucinate" in prompt or "never hallucinate" in prompt.lower()

	def test_system_prompt_includes_domain_name(self) -> None:
	"""System prompt should include domain name."""
	prompt = get_synthesis_system_prompt("sexual_health")
	assert "sexual health" in prompt.lower()


	@pytest.mark.unit
	class TestFormatSynthesisPrompt:
	"""Tests for synthesis user prompt formatting."""

	def test_includes_query(self) -> None:
	"""User prompt should include the research query."""
	prompt = format_synthesis_prompt(
	query="testosterone libido",
	evidence_summary="Study shows efficacy...",
	drug_candidates=["Testosterone"],
	key_findings=["Improved libido"],
	mechanism_score=8,
	clinical_score=7,
	confidence=0.85,
	)
	assert "testosterone libido" in prompt

	def test_includes_evidence_summary(self) -> None:
	"""User prompt should include evidence summary."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="Study by Smith et al. shows significant results...",
	drug_candidates=[],
	key_findings=[],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "Study by Smith et al." in prompt

	def test_includes_drug_candidates(self) -> None:
	"""User prompt should include drug candidates."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=["Testosterone", "Flibanserin"],
	key_findings=[],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "Testosterone" in prompt
	assert "Flibanserin" in prompt

	def test_includes_key_findings(self) -> None:
	"""User prompt should include key findings."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=[],
	key_findings=["Improved libido in postmenopausal women", "Safe profile"],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "Improved libido in postmenopausal women" in prompt
	assert "Safe profile" in prompt

	def test_includes_scores(self) -> None:
	"""User prompt should include assessment scores."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=[],
	key_findings=[],
	mechanism_score=8,
	clinical_score=7,
	confidence=0.85,
	)
	assert "8/10" in prompt
	assert "7/10" in prompt
	assert "85%" in prompt

	def test_handles_empty_candidates(self) -> None:
	"""User prompt should handle empty drug candidates."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=[],
	key_findings=[],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "None identified" in prompt

	def test_handles_empty_findings(self) -> None:
	"""User prompt should handle empty key findings."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=[],
	key_findings=[],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "No specific findings" in prompt

	def test_includes_few_shot_example(self) -> None:
	"""User prompt should include few-shot example."""
	prompt = format_synthesis_prompt(
	query="test query",
	evidence_summary="...",
	drug_candidates=[],
	key_findings=[],
	mechanism_score=5,
	clinical_score=5,
	confidence=0.5,
	)
	assert "Alprostadil" in prompt # From the few-shot example


	@pytest.mark.unit
	class TestFewShotExample:
	"""Tests for the few-shot example quality."""

	def test_few_shot_is_mostly_narrative(self) -> None:
	"""Few-shot example should be mostly prose paragraphs, not bullets."""
	# Count substantial paragraphs (>100 chars of prose)
	paragraphs = [p for p in FEW_SHOT_EXAMPLE.split("\n\n") if len(p) > 100]
	# Count bullet points
	bullets = FEW_SHOT_EXAMPLE.count("\n- ") + FEW_SHOT_EXAMPLE.count("\n1. ")

	# Prose should dominate - at least as many paragraphs as bullets
	assert len(paragraphs) >= bullets, "Few-shot example should be mostly narrative prose"

	def test_few_shot_has_executive_summary(self) -> None:
	"""Few-shot example should demonstrate executive summary."""
	assert "Executive Summary" in FEW_SHOT_EXAMPLE

	def test_few_shot_has_background(self) -> None:
	"""Few-shot example should demonstrate background section."""
	assert "Background" in FEW_SHOT_EXAMPLE

	def test_few_shot_has_evidence_synthesis(self) -> None:
	"""Few-shot example should demonstrate evidence synthesis."""
	assert "Evidence Synthesis" in FEW_SHOT_EXAMPLE
	assert "Mechanism of Action" in FEW_SHOT_EXAMPLE

	def test_few_shot_has_recommendations(self) -> None:
	"""Few-shot example should demonstrate recommendations."""
	assert "Recommendations" in FEW_SHOT_EXAMPLE

	def test_few_shot_has_limitations(self) -> None:
	"""Few-shot example should demonstrate limitations."""
	assert "Limitations" in FEW_SHOT_EXAMPLE

	def test_few_shot_has_references(self) -> None:
	"""Few-shot example should demonstrate references format."""
	assert "References" in FEW_SHOT_EXAMPLE
	assert "pubmed.ncbi.nlm.nih.gov" in FEW_SHOT_EXAMPLE

	def test_few_shot_includes_statistics(self) -> None:
	"""Few-shot example should demonstrate statistical reporting."""
	assert "%" in FEW_SHOT_EXAMPLE # Percentages
	assert "p<" in FEW_SHOT_EXAMPLE or "p=" in FEW_SHOT_EXAMPLE # P-values
	assert "CI" in FEW_SHOT_EXAMPLE # Confidence intervals