Codette-Reasoning / tests /test_phase7_executive_controller.py
Jonathan Harrison
Full Codette codebase sync — transparency release
74f2af5
"""Tests for Phase 7 Executive Controller
Validates:
1. Routing decisions for SIMPLE/MEDIUM/COMPLEX queries
2. Component activation correctness
3. Transparency metadata generation
4. Latency and correctness estimates
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from reasoning_forge.query_classifier import QueryComplexity
from reasoning_forge.executive_controller import (
ExecutiveController,
ExecutiveControllerWithLearning,
ComponentDecision,
)
def test_simple_routing():
"""Test that SIMPLE queries skip heavy machinery."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query("What is the speed of light?", QueryComplexity.SIMPLE)
assert decision.query_complexity == QueryComplexity.SIMPLE
assert decision.component_activation['debate'] == False
assert decision.component_activation['semantic_tension'] == False
assert decision.component_activation['preflight_predictor'] == False
assert decision.estimated_latency_ms < 200 # Fast
assert decision.estimated_correctness > 0.90
assert decision.estimated_compute_cost < 10 # Low cost
print("[OK] SIMPLE routing correct")
def test_medium_routing():
"""Test that MEDIUM queries use selective components."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query(
"How does quantum mechanics relate to consciousness?",
QueryComplexity.MEDIUM
)
assert decision.query_complexity == QueryComplexity.MEDIUM
assert decision.component_activation['debate'] == True
assert decision.component_activation['semantic_tension'] == True
assert decision.component_activation['specialization_tracking'] == True
assert decision.component_activation['preflight_predictor'] == False # Skipped
assert decision.component_config.get('debate_rounds') == 1
assert 800 < decision.estimated_latency_ms < 1000 # Medium latency
assert decision.estimated_correctness > 0.70
assert 20 < decision.estimated_compute_cost < 30
print("[OK] MEDIUM routing correct")
def test_complex_routing():
"""Test that COMPLEX queries use full machinery."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query(
"Can machines be truly conscious?",
QueryComplexity.COMPLEX
)
assert decision.query_complexity == QueryComplexity.COMPLEX
assert decision.component_activation['debate'] == True
assert decision.component_activation['semantic_tension'] == True
assert decision.component_activation['preflight_predictor'] == True
assert decision.component_activation['specialization_tracking'] == True
assert decision.component_config.get('debate_rounds') == 3
assert decision.estimated_latency_ms > 2000 # Slow but thorough
assert 40 < decision.estimated_compute_cost < 60
print("[OK] COMPLEX routing correct")
def test_route_transparency_metadata():
"""Test that routing transparency metadata is generated correctly."""
ctrl = ExecutiveController()
decision = ctrl.route_query(
"What is entropy?",
QueryComplexity.SIMPLE
)
# Simulate execution with measured latency
metadata = ExecutiveController.create_route_metadata(
decision=decision,
actual_latency_ms=145, # Slightly faster than estimated
actual_conflicts=0,
gamma=0.95
)
assert 'phase7_routing' in metadata
routing = metadata['phase7_routing']
assert routing['query_complexity'] == 'simple'
assert 'components_activated' in routing
assert routing['components_activated']['debate'] == False
assert routing['components_activated']['semantic_tension'] == False
# Check latency analysis
assert routing['latency_analysis']['estimated_ms'] == decision.estimated_latency_ms
assert routing['latency_analysis']['actual_ms'] == 145
assert routing['latency_analysis']['savings_ms'] > 0 # Faster than estimated
# Check metrics
assert routing['metrics']['conflicts_detected'] == 0
assert routing['metrics']['gamma_coherence'] == 0.95
print("[OK] Transparency metadata correct")
def test_routing_statistics():
"""Test that controller tracks routing statistics."""
ctrl = ExecutiveController()
# Simulate several queries
ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
ctrl.route_query("How does light work?", QueryComplexity.MEDIUM)
ctrl.route_query("Can light be conscious?", QueryComplexity.COMPLEX)
stats = ctrl.get_routing_statistics()
assert stats['total_queries_routed'] == 4
assert 'component_activation_counts' in stats
print(f" Stats: {stats}")
print("[OK] Routing statistics tracked")
def test_component_activation_counts():
"""Test that component activation counts are accurate."""
ctrl = ExecutiveController()
# Route several queries
for _ in range(3):
ctrl.route_query("What?", QueryComplexity.SIMPLE)
for _ in range(2):
ctrl.route_query("How?", QueryComplexity.MEDIUM)
for _ in range(1):
ctrl.route_query("Why?", QueryComplexity.COMPLEX)
stats = ctrl.get_routing_statistics()
counts = stats['component_activation_counts']
# SIMPLE queries (3): only synthesis should be False
# MEDIUM/COMPLEX queries (3): debate should be activated 3 times
assert counts.get('debate', 0) == 3 # MEDIUM (2) + COMPLEX (1)
assert counts.get('semantic_tension', 0) == 3
assert counts.get('specialization_tracking', 0) == 3
print(f" Component activation counts: {counts}")
print("[OK] Component activation counts correct")
def test_learning_routing():
"""Test that learning router initializes and learns."""
ctrl = ExecutiveControllerWithLearning(verbose=False) # Quieter for test
# Initial route (no learned patterns yet)
decision = ctrl.route_query("What's the speed?", QueryComplexity.SIMPLE)
assert decision.query_complexity == QueryComplexity.SIMPLE
# Directly set learned routes (simulating what update_routes_from_history would do)
ctrl.learned_routes = {
'simple': 0.95, # Use lowercase to match QueryComplexity.value
'medium': 0.80,
'complex': 0.85,
}
# Check that learned routes were set
assert 'simple' in ctrl.learned_routes
assert 'medium' in ctrl.learned_routes
assert 'complex' in ctrl.learned_routes
# Simple routes should have highest confidence
assert ctrl.learned_routes['simple'] >= ctrl.learned_routes['medium']
# Test get_route_confidence
simple_confidence = ctrl.get_route_confidence(QueryComplexity.SIMPLE)
assert simple_confidence == 0.95, f"Expected 0.95, got {simple_confidence}"
print(f" Learned routes: {ctrl.learned_routes}")
print("[OK] Learning router works")
def test_compute_cost_ranking():
"""Test that compute costs are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
ctrl = ExecutiveController()
simple_decision = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
medium_decision = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
complex_decision = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)
# Reset counts
ctrl.route_activation_counts = {}
assert simple_decision.estimated_compute_cost < medium_decision.estimated_compute_cost
assert medium_decision.estimated_compute_cost < complex_decision.estimated_compute_cost
print(f" Cost ranking: {simple_decision.estimated_compute_cost} < "
f"{medium_decision.estimated_compute_cost} < "
f"{complex_decision.estimated_compute_cost}")
print("[OK] Compute cost ranking correct")
def test_latency_ranking():
"""Test that latencies are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
ctrl = ExecutiveController()
simple = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
medium = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
complex = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)
assert simple.estimated_latency_ms < medium.estimated_latency_ms
assert medium.estimated_latency_ms < complex.estimated_latency_ms
print(f" Latency ranking: {simple.estimated_latency_ms}ms < "
f"{medium.estimated_latency_ms}ms < "
f"{complex.estimated_latency_ms}ms")
print("[OK] Latency ranking correct")
def test_component_decision_asdict():
"""Test that ComponentDecision can be serialized."""
ctrl = ExecutiveController()
decision = ctrl.route_query("Test query", QueryComplexity.SIMPLE)
# Should be able to convert to dict
decision_dict = {
'query_complexity': decision.query_complexity.value,
'component_activation': decision.component_activation,
'reasoning': decision.reasoning,
'estimated_latency_ms': decision.estimated_latency_ms,
'estimated_correctness': decision.estimated_correctness,
'estimated_compute_cost': decision.estimated_compute_cost,
}
assert decision_dict['query_complexity'] == 'simple'
assert decision_dict['reasoning'] != ""
print("[OK] ComponentDecision serializable")
if __name__ == '__main__':
print("\n" + "=" * 70)
print("Phase 7 Executive Controller Tests")
print("=" * 70 + "\n")
test_simple_routing()
test_medium_routing()
test_complex_routing()
test_route_transparency_metadata()
test_routing_statistics()
test_component_activation_counts()
test_learning_routing()
test_compute_cost_ranking()
test_latency_ranking()
test_component_decision_asdict()
print("\n" + "=" * 70)
print("PASS: All Phase 7 Executive Controller tests passed!")
print("=" * 70 + "\n")