| """ |
| Comprehensive test suite for Level 2 features |
| """ |
|
|
| import asyncio |
| import sys |
| import os |
|
|
| |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
|
|
| from agent.core.level2_config import llm_config, level2_config, LLMConfig, Level2Config |
| from agent.core.semantic_cache import SemanticCache, semantic_cache |
| from agent.core.observability import RealTimeObservabilityEngine, observability, ExecutionEvent |
| from agent.core.contextual_memory import ContextualMemoryEngine, memory_engine, ExecutionResult |
| from agent.core.adaptive_reasoning import AdaptiveReasoningEngine, reasoning_engine |
| from agent.core.optimized_executor import OptimizedToolExecutor, tool_executor, ToolCall |
|
|
|
|
| async def test_config(): |
| """Test Level 2 configuration""" |
| print("\n=== Testing Level 2 Configuration ===") |
| |
| |
| print(f"LLM Model: {llm_config.model}") |
| print(f"Temperature: {llm_config.temperature}") |
| print(f"Max Tokens: {llm_config.max_tokens}") |
| |
| |
| print(f"\nMulti-pass reasoning: {level2_config.enable_multi_pass_reasoning}") |
| print(f"Semantic cache: {level2_config.enable_semantic_cache}") |
| print(f"Parallel execution: {level2_config.enable_parallel_execution}") |
| print(f"Auto retry: {level2_config.enable_auto_retry}") |
| print(f"Max retries: {level2_config.max_tool_retries}") |
| |
| print("✅ Configuration test passed") |
|
|
|
|
| async def test_semantic_cache(): |
| """Test semantic caching""" |
| print("\n=== Testing Semantic Cache ===") |
| |
| cache = SemanticCache() |
| |
| |
| query = "What is the capital of France?" |
| result = {"answer": "Paris", "confidence": 0.99} |
| |
| key = await cache.store(query, result, {"type": "qa"}) |
| print(f"Stored with key: {key[:8]}...") |
| |
| |
| cached = await cache.check(query) |
| if cached: |
| print(f"Cache hit! Similarity: {cached.similarity:.3f}") |
| print(f"Result: {cached.result}") |
| else: |
| print("❌ Cache miss (unexpected)") |
| return False |
| |
| |
| miss_query = "What is the weather today?" |
| miss_cached = await cache.check(miss_query) |
| if miss_cached: |
| print("❌ Cache hit (unexpected for different query)") |
| return False |
| else: |
| print("✅ Cache miss as expected for different query") |
| |
| |
| stats = cache.get_stats() |
| print(f"\nCache stats: {stats}") |
| |
| print("✅ Semantic cache test passed") |
| return True |
|
|
|
|
| async def test_observability(): |
| """Test observability engine""" |
| print("\n=== Testing Observability Engine ===") |
| |
| obs = RealTimeObservabilityEngine() |
| |
| |
| for i in range(10): |
| event = ExecutionEvent( |
| event_type="tool_execution_complete", |
| data={ |
| "tool": "web_search", |
| "success": i < 8, |
| "duration": 1.0 + i * 0.1, |
| "cost": 0.05 |
| } |
| ) |
| anomaly = obs.track_execution(event) |
| if anomaly: |
| print(f"Anomaly detected: {anomaly.tool_name} - {anomaly.deviation_percent:.1f}%") |
| |
| |
| metrics = obs.get_tool_metrics("web_search") |
| print(f"\nTool metrics: {metrics}") |
| |
| |
| summary = obs.get_summary() |
| print(f"\nObservability summary: {summary}") |
| |
| |
| warning = obs.predict_failure() |
| if warning: |
| print(f"Predictive warning: {warning.predicted_issue}") |
| |
| print("✅ Observability test passed") |
| return True |
|
|
|
|
| async def test_contextual_memory(): |
| """Test contextual memory engine""" |
| print("\n=== Testing Contextual Memory Engine ===") |
| |
| mem = ContextualMemoryEngine(storage_path="/tmp/test_memory") |
| |
| |
| context = await mem.retrieve_context( |
| query="Build a React app with TypeScript", |
| user_id="test_user", |
| max_tokens=1000 |
| ) |
| |
| print(f"Retrieved context:") |
| print(f" - User memory: {context.user_memory is not None}") |
| print(f" - Similar examples: {len(context.similar_examples)}") |
| print(f" - Domain knowledge: {len(context.domain_knowledge)}") |
| print(f" - Compressed size: {context.compressed_size} tokens") |
| |
| |
| result = ExecutionResult( |
| query="Build a React app", |
| success=True, |
| tools_used=["execute_code", "web_search"], |
| reasoning=["Analyzed requirements", "Created plan"] |
| ) |
| |
| await mem.learn_from_execution("test_user", result) |
| |
| |
| stats = mem.get_user_stats("test_user") |
| print(f"\nUser stats: {stats}") |
| |
| print("✅ Contextual memory test passed") |
| return True |
|
|
|
|
| async def test_adaptive_reasoning(): |
| """Test adaptive reasoning engine""" |
| print("\n=== Testing Adaptive Reasoning Engine ===") |
| |
| engine = AdaptiveReasoningEngine() |
| |
| |
| print("\nTesting problem analysis...") |
| analysis = await engine.analyze_problem("Create a Python function to sort a list") |
| print(f" Difficulty: {analysis.estimated_difficulty}") |
| print(f" Tool calls: {analysis.estimated_tool_calls}") |
| print(f" Domains: {analysis.domains}") |
| |
| |
| print("\nTesting execution plan creation...") |
| plan = await engine.create_execution_plan( |
| query="Create a Python function to sort a list", |
| analysis=analysis, |
| available_tools=["execute_code", "web_search"] |
| ) |
| print(f" Steps: {len(plan.steps)}") |
| print(f" Total cost: {plan.total_cost}") |
| print(f" Estimated duration: {plan.estimated_duration}s") |
| |
| for step in plan.steps: |
| print(f" - Step {step.step_number}: {step.tool} ({step.reasoning[:50]}...)") |
| |
| |
| print("\nTesting solution verification...") |
| verification = await engine.verify_solution( |
| query="Create a Python function to sort a list", |
| result="def sort_list(lst): return sorted(lst)", |
| steps_executed=["execute_code"] |
| ) |
| print(f" Complete: {verification.is_complete}") |
| print(f" Quality score: {verification.quality_score}") |
| print(f" Improvements: {verification.improvements}") |
| |
| print("✅ Adaptive reasoning test passed") |
| return True |
|
|
|
|
| async def test_optimized_executor(): |
| """Test optimized tool executor""" |
| print("\n=== Testing Optimized Tool Executor ===") |
| |
| executor = OptimizedToolExecutor() |
| |
| |
| async def mock_execute(tool_name: str, params: dict): |
| await asyncio.sleep(0.1) |
| return f"Result from {tool_name}" |
| |
| |
| tools = [ |
| ToolCall(id="1", name="web_search", params={"query": "test"}), |
| ToolCall(id="2", name="execute_code", params={"code": "print('hello')"}), |
| ] |
| |
| |
| print("\nExecuting tools with optimization...") |
| results = await executor.execute_with_optimization( |
| tools=tools, |
| execute_fn=mock_execute |
| ) |
| |
| for tool_id, result in results.items(): |
| print(f" Tool {result.tool_name}: success={result.success}, time={result.execution_time:.3f}s") |
| |
| print("✅ Optimized executor test passed") |
| return True |
|
|
|
|
| async def run_all_tests(): |
| """Run all tests""" |
| print("=" * 60) |
| print("Level 2 Features Test Suite") |
| print("=" * 60) |
| |
| tests = [ |
| ("Configuration", test_config), |
| ("Semantic Cache", test_semantic_cache), |
| ("Observability", test_observability), |
| ("Contextual Memory", test_contextual_memory), |
| ("Adaptive Reasoning", test_adaptive_reasoning), |
| ("Optimized Executor", test_optimized_executor), |
| ] |
| |
| passed = 0 |
| failed = 0 |
| |
| for name, test_fn in tests: |
| try: |
| result = await test_fn() |
| if result is not False: |
| passed += 1 |
| else: |
| failed += 1 |
| except Exception as e: |
| print(f"❌ {name} test failed with error: {e}") |
| import traceback |
| traceback.print_exc() |
| failed += 1 |
| |
| print("\n" + "=" * 60) |
| print(f"Test Results: {passed} passed, {failed} failed") |
| print("=" * 60) |
| |
| return failed == 0 |
|
|
|
|
| if __name__ == "__main__": |
| success = asyncio.run(run_all_tests()) |
| sys.exit(0 if success else 1) |
|
|