|
|
""" |
|
|
Integration tests for ai-me agent. |
|
|
Tests the complete setup including vectorstore, agent configuration, and agent responses. |
|
|
""" |
|
|
import pytest |
|
|
import pytest_asyncio |
|
|
import re |
|
|
import sys |
|
|
import os |
|
|
import logging |
|
|
from datetime import datetime |
|
|
from unittest.mock import AsyncMock, patch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
os.environ["TEMPERATURE"] = "0" |
|
|
os.environ["SEED"] = "42" |
|
|
|
|
|
|
|
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) |
|
|
test_data_dir = os.path.join(project_root, "tests", "data") |
|
|
os.environ["DOC_ROOT"] = test_data_dir |
|
|
os.environ["LOCAL_DOCS"] = "**/*.md" |
|
|
|
|
|
from config import setup_logger, Config |
|
|
from agent import AIMeAgent |
|
|
from data import DataManager, DataManagerConfig |
|
|
|
|
|
logger = setup_logger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_config = None |
|
|
_vectorstore = None |
|
|
_data_manager = None |
|
|
|
|
|
|
|
|
def _get_shared_config(): |
|
|
"""Lazy initialization of shared config.""" |
|
|
global _config |
|
|
if _config is None: |
|
|
_config = Config() |
|
|
logger.info(f"Initialized shared config: {_config.bot_full_name}") |
|
|
return _config |
|
|
|
|
|
|
|
|
def _get_shared_vectorstore(): |
|
|
"""Lazy initialization of shared vectorstore.""" |
|
|
global _vectorstore, _data_manager |
|
|
if _vectorstore is None: |
|
|
logger.info("Initializing shared vectorstore (first test)...") |
|
|
test_data_dir = os.path.join(project_root, "tests", "data") |
|
|
_data_config = DataManagerConfig( |
|
|
doc_root=test_data_dir |
|
|
) |
|
|
_data_manager = DataManager(config=_data_config) |
|
|
_vectorstore = _data_manager.setup_vectorstore() |
|
|
logger.info(f"Shared vectorstore ready: {_vectorstore._collection.count()} documents") |
|
|
return _vectorstore |
|
|
|
|
|
|
|
|
@pytest_asyncio.fixture(scope="function") |
|
|
async def ai_me_agent(): |
|
|
""" |
|
|
Setup fixture for ai-me agent with vectorstore and MCP servers. |
|
|
|
|
|
CRITICAL: Function-scoped fixture prevents hanging/blocking issues. |
|
|
Each test gets its own agent instance with proper cleanup. |
|
|
|
|
|
Reuses shared config and vectorstore (lazy-initialized on first use). |
|
|
|
|
|
This fixture: |
|
|
- Reuses shared config and vectorstore |
|
|
- Creates agent WITH real subprocess MCP servers (GitHub, Time, Memory) |
|
|
- Yields agent for test |
|
|
- Cleans up MCP servers after test completes |
|
|
""" |
|
|
config = _get_shared_config() |
|
|
vectorstore = _get_shared_vectorstore() |
|
|
|
|
|
|
|
|
aime_agent = AIMeAgent( |
|
|
bot_full_name=config.bot_full_name, |
|
|
model=config.model, |
|
|
vectorstore=vectorstore, |
|
|
github_token=config.github_token, |
|
|
session_id="test-session" |
|
|
) |
|
|
|
|
|
|
|
|
logger.info("Creating ai-me agent with MCP servers...") |
|
|
assert aime_agent.session_id is not None, "session_id should be set" |
|
|
await aime_agent.create_ai_me_agent( |
|
|
mcp_params=[ |
|
|
aime_agent.mcp_github_params, |
|
|
aime_agent.mcp_time_params, |
|
|
aime_agent.get_mcp_memory_params(aime_agent.session_id), |
|
|
] |
|
|
) |
|
|
logger.info("Agent created successfully with MCP servers") |
|
|
logger.info(f"Temperature set to {config.temperature}") |
|
|
logger.info(f"Seed set to {config.seed}") |
|
|
|
|
|
|
|
|
yield aime_agent |
|
|
|
|
|
|
|
|
logger.info("Cleaning up MCP servers after test...") |
|
|
await aime_agent.cleanup() |
|
|
logger.info("Cleanup complete") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_github_documents_load(): |
|
|
"""Tests FR-002: GitHub document loading with source metadata.""" |
|
|
config = Config() |
|
|
|
|
|
|
|
|
github_config = DataManagerConfig( |
|
|
doc_load_local=[] |
|
|
) |
|
|
dm = DataManager(config=github_config) |
|
|
vs = dm.setup_vectorstore(github_repos=["byoung/ai-me"]) |
|
|
|
|
|
agent = AIMeAgent( |
|
|
bot_full_name=config.bot_full_name, |
|
|
model=config.model, |
|
|
vectorstore=vs, |
|
|
github_token=config.github_token, |
|
|
session_id="test-session" |
|
|
) |
|
|
await agent.create_ai_me_agent() |
|
|
|
|
|
response = await agent.run("Do you have python experience?") |
|
|
|
|
|
assert "yes" in response.lower(), ( |
|
|
f"yes' in response but got: {response}" |
|
|
) |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_rear_knowledge_contains_it245(ai_me_agent): |
|
|
"""Tests REQ-001: Knowledge base retrieval of personal documentation.""" |
|
|
response = await ai_me_agent.run("What is IT-245?") |
|
|
|
|
|
assert "IT-245" in response or "It-245" in response or "it-245" in response |
|
|
logger.info("✓ IT-245 found in response") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_github_commits_contains_shas(ai_me_agent): |
|
|
"""Tests REQ-002: MCP GitHub integration - retrieve commit history.""" |
|
|
response = await ai_me_agent.run("What are some recent commits I've made?") |
|
|
|
|
|
assert response, "Response is empty" |
|
|
assert len(response) > 10, "Response is too short" |
|
|
logger.info("✓ Response contains commit information") |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_unknown_person_contains_negative_response(ai_me_agent): |
|
|
"""Tests REQ-003: Graceful handling of out-of-scope requests.""" |
|
|
response = await ai_me_agent.run( |
|
|
"Do you know Slartibartfast?" |
|
|
) |
|
|
|
|
|
assert response, "Response is empty" |
|
|
assert ( |
|
|
"don't know" in response.lower() |
|
|
or "not familiar" in response.lower() |
|
|
or "no information" in response.lower() |
|
|
or "don't have any information" in response.lower() |
|
|
), f"Response doesn't indicate lack of knowledge: {response}" |
|
|
logger.info(f"✓ Test passed - correctly handled out-of-scope query") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_carol_knowledge_contains_product(ai_me_agent): |
|
|
"""Tests FR-002, FR-003: Verify asking about Carol returns 'product'.""" |
|
|
response_raw = await ai_me_agent.run("Do you know Carol?") |
|
|
response = response_raw.lower() |
|
|
|
|
|
|
|
|
assert "product" in response, ( |
|
|
f"Expected 'product' in response but got: {response}" |
|
|
) |
|
|
logger.info("✓ Test passed: Response contains 'product'") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_mcp_time_server_returns_current_date(ai_me_agent): |
|
|
"""Tests FR-009, NFR-001: Verify that the MCP time server returns the current date.""" |
|
|
response = await ai_me_agent.run("What is today's date?") |
|
|
|
|
|
|
|
|
now = datetime.now() |
|
|
expected_date, current_year, current_month, current_day = ( |
|
|
now.strftime("%Y-%m-%d"), |
|
|
str(now.year), |
|
|
now.strftime("%B"), |
|
|
str(now.day), |
|
|
) |
|
|
|
|
|
|
|
|
has_date = ( |
|
|
expected_date in response |
|
|
or ( |
|
|
current_year in response |
|
|
and current_month in response |
|
|
and current_day in response |
|
|
) |
|
|
) |
|
|
|
|
|
assert has_date, ( |
|
|
f"Expected response to contain current date " |
|
|
f"({expected_date} or {current_month} {current_day}, {current_year}) " |
|
|
f"but got: {response}" |
|
|
) |
|
|
logger.info(f"✓ Test passed: Response contains current date") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_mcp_memory_server_remembers_favorite_color(ai_me_agent): |
|
|
"""Tests FR-013, NFR-002: |
|
|
Verify that the MCP memory server persists information across interactions. |
|
|
""" |
|
|
await ai_me_agent.run("My favorite color is chartreuse.") |
|
|
response2 = await ai_me_agent.run("What's my favorite color?") |
|
|
|
|
|
|
|
|
assert "chartreuse" in response2.lower(), ( |
|
|
f"Expected agent to remember favorite color 'chartreuse' " |
|
|
f"but got: {response2}" |
|
|
) |
|
|
msg = ( |
|
|
"✓ Test passed: Agent remembered favorite color 'chartreuse' " |
|
|
"across interactions" |
|
|
) |
|
|
logger.info(msg) |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_github_relative_links_converted_to_absolute_urls(): |
|
|
"""Tests FR-004: Document processing converts relative GitHub links to absolute URLs. |
|
|
|
|
|
Validates that when documents are loaded from GitHub with relative links |
|
|
(e.g., /resume.md), they are rewritten to full GitHub URLs |
|
|
(e.g., https://github.com/owner/repo/blob/main/resume.md). |
|
|
|
|
|
This is a unit-level test of the DataManager.process_documents() method. |
|
|
""" |
|
|
from langchain_core.documents import Document |
|
|
|
|
|
sample_doc = Document( |
|
|
page_content=( |
|
|
"Check out [my resume](/resume.md) and " |
|
|
"[projects](/projects.md) for more info." |
|
|
), |
|
|
metadata={ |
|
|
"source": "github://byoung/ai-me/docs/about.md", |
|
|
"github_repo": "byoung/ai-me" |
|
|
} |
|
|
) |
|
|
|
|
|
|
|
|
assert sample_doc.metadata["github_repo"] == "byoung/ai-me", ( |
|
|
"Sample doc metadata should have github_repo" |
|
|
) |
|
|
|
|
|
data_config = DataManagerConfig() |
|
|
data_manager = DataManager(config=data_config) |
|
|
processed_docs = data_manager.process_documents([sample_doc]) |
|
|
|
|
|
assert len(processed_docs) == 1, "Expected 1 processed document" |
|
|
processed_content = processed_docs[0].page_content |
|
|
|
|
|
|
|
|
assert "https://github.com/byoung/ai-me/blob/main/resume.md" in processed_content, ( |
|
|
f"Expected absolute GitHub URL for /resume.md in processed content, " |
|
|
f"but got: {processed_content}" |
|
|
) |
|
|
assert "https://github.com/byoung/ai-me/blob/main/projects.md" in processed_content, ( |
|
|
f"Expected absolute GitHub URL for /projects.md in processed content, " |
|
|
f"but got: {processed_content}" |
|
|
) |
|
|
|
|
|
logger.info("✓ Test passed: Relative GitHub links converted to absolute URLs") |
|
|
logger.info(f" Original: [my resume](/resume.md)") |
|
|
logger.info(f" Converted: [my resume](https://github.com/byoung/ai-me/blob/main/resume.md)") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_agent_responses_cite_sources(ai_me_agent): |
|
|
"""Tests FR-004, FR-011: Agent responses include source citations. |
|
|
|
|
|
Validates that agent responses include proper source attribution, |
|
|
which could be GitHub URLs, local paths, or explicit source references. |
|
|
""" |
|
|
questions = [ |
|
|
"What do you know about ReaR?", |
|
|
"Tell me about your experience in technology", |
|
|
] |
|
|
|
|
|
for question in questions: |
|
|
logger.info(f"\n{'='*60}\nSource citation test: {question}\n{'='*60}") |
|
|
|
|
|
response = await ai_me_agent.run(question) |
|
|
|
|
|
|
|
|
|
|
|
has_source = ( |
|
|
"https://github.com/" in response or |
|
|
".md" in response or |
|
|
"source" in response.lower() or |
|
|
"documentation" in response.lower() |
|
|
) |
|
|
assert has_source, ( |
|
|
f"Expected source attribution in response to '{question}' " |
|
|
f"but found none. Response: {response}" |
|
|
) |
|
|
|
|
|
|
|
|
min_length = 50 |
|
|
assert len(response) > min_length, ( |
|
|
f"Response to '{question}' was too short: {response}" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ Source citation found for: {question[:40]}...") |
|
|
|
|
|
logger.info("\n✓ Test passed: Agent responses cite sources (FR-004, FR-011)") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_user_story_2_multi_topic_consistency(ai_me_agent): |
|
|
""" |
|
|
Tests FR-001, FR-003, FR-005, NFR-002: User Story 2 - Multi-Topic Consistency |
|
|
|
|
|
Verify that the agent maintains consistent first-person perspective |
|
|
across multiple conversation topics. |
|
|
|
|
|
This tests that the agent: |
|
|
- Uses first-person perspective (I, my, me) consistently |
|
|
- Maintains professional tone across different topic switches |
|
|
- Shows context awareness of different topics |
|
|
- Remains in-character as the personified individual |
|
|
""" |
|
|
|
|
|
topics = [ |
|
|
("What is your background in technology?", "background|experience|technology"), |
|
|
("What programming languages are you skilled in?", "programming|language|skilled"), |
|
|
] |
|
|
|
|
|
first_person_patterns = [ |
|
|
r"\bi\b", r"\bme\b", r"\bmy\b", r"\bmyself\b", |
|
|
r"\bI['m]", r"\bI['ve]", r"\bI['ll]" |
|
|
] |
|
|
|
|
|
for question, topic_keywords in topics: |
|
|
logger.info(f"\n{'='*60}\nMulti-topic test question: {question}\n{'='*60}") |
|
|
|
|
|
response = await ai_me_agent.run(question) |
|
|
response_lower = response.lower() |
|
|
|
|
|
|
|
|
first_person_found = any( |
|
|
re.search(pattern, response, re.IGNORECASE) |
|
|
for pattern in first_person_patterns |
|
|
) |
|
|
assert first_person_found, ( |
|
|
f"Expected first-person perspective in response to '{question}' " |
|
|
f"but got: {response}" |
|
|
) |
|
|
|
|
|
|
|
|
min_length = 50 |
|
|
assert len(response) > min_length, ( |
|
|
f"Response to '{question}' was too short (likely not substantive): {response}" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ First-person perspective maintained for: {question[:40]}...") |
|
|
logger.info(f" Response preview: {response[:100]}...") |
|
|
|
|
|
logger.info("\n✓ Test passed: Consistent first-person perspective across 3+ topics") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_tool_failure_error_messages_are_friendly(caplog, ai_me_agent): |
|
|
""" |
|
|
Tests FR-012, NFR-003: Error Message Quality (FR-012) |
|
|
|
|
|
Verify that tool failures return user-friendly messages without Python tracebacks. |
|
|
|
|
|
This tests that the agent: |
|
|
- Returns human-readable error messages |
|
|
- logs an error that can be reviewed in our dashboard/logs |
|
|
|
|
|
Uses mocking to simulate tool failures without adding test-specific code to agent.py |
|
|
""" |
|
|
logger.info(f"\n{'='*60}\nError Handling Test\n{'='*60}") |
|
|
|
|
|
|
|
|
|
|
|
test_scenarios = [ |
|
|
RuntimeError("Simulated tool timeout"), |
|
|
ValueError("Invalid tool parameters"), |
|
|
] |
|
|
|
|
|
for error in test_scenarios: |
|
|
logger.info(f"\nTesting error scenario: {error.__class__.__name__}: {error}") |
|
|
|
|
|
|
|
|
caplog.clear() |
|
|
|
|
|
|
|
|
with patch('agent.Runner.run', new_callable=AsyncMock) as mock_run: |
|
|
mock_run.side_effect = error |
|
|
|
|
|
response = await ai_me_agent.run("Any user question") |
|
|
|
|
|
logger.info(f"Response: {response[:100]}...") |
|
|
|
|
|
|
|
|
assert "I encountered an unexpected error" in response, ( |
|
|
f"Response must contain 'I encountered an unexpected error'. Got: {response}" |
|
|
) |
|
|
|
|
|
|
|
|
error_logs = [record for record in caplog.records if record.levelname == "ERROR"] |
|
|
assert len(error_logs) > 0, "Expected at least one ERROR log record from agent.py" |
|
|
|
|
|
|
|
|
agent_error_logged = any( |
|
|
"Unexpected error:" in record.message for record in error_logs |
|
|
) |
|
|
assert agent_error_logged, ( |
|
|
f"Expected ERROR log with 'Unexpected error:' from agent.py. " |
|
|
f"Got: {[r.message for r in error_logs]}" |
|
|
) |
|
|
error_messages = [ |
|
|
r.message for r in error_logs |
|
|
if "Unexpected error:" in r.message |
|
|
] |
|
|
logger.info( |
|
|
f"✓ Error properly logged to logger: {error_messages}" |
|
|
) |
|
|
|
|
|
logger.info("\n✓ Test passed: Error messages are friendly (FR-012) + properly logged") |
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_logger_setup_format(caplog): |
|
|
"""Tests NFR-003 (Structured Logging): Verify setup_logger creates structured logging. |
|
|
|
|
|
Tests that setup_logger() configures syslog-style format with JSON support for |
|
|
structured logging of user/agent interactions. |
|
|
|
|
|
This validates the logger configuration that our production app relies on |
|
|
for analytics and debugging. |
|
|
""" |
|
|
|
|
|
root_logger = logging.getLogger() |
|
|
original_handlers = root_logger.handlers[:] |
|
|
for handler in root_logger.handlers[:]: |
|
|
root_logger.removeHandler(handler) |
|
|
|
|
|
try: |
|
|
|
|
|
test_logger = setup_logger("test.structured_logging") |
|
|
|
|
|
|
|
|
assert test_logger.name == "test.structured_logging" |
|
|
|
|
|
|
|
|
assert len(root_logger.handlers) > 0, ( |
|
|
"Root logger should have handlers after setup_logger" |
|
|
) |
|
|
|
|
|
|
|
|
has_stream_handler = any( |
|
|
isinstance(handler, logging.StreamHandler) |
|
|
for handler in root_logger.handlers |
|
|
) |
|
|
assert has_stream_handler, "Should have StreamHandler for console output" |
|
|
|
|
|
|
|
|
|
|
|
test_logger.info( |
|
|
'{"session_id": "test-session", "user_input": "test message"}' |
|
|
) |
|
|
|
|
|
logger.info( |
|
|
"✓ Test passed: Logger setup configures structured logging (NFR-003)" |
|
|
) |
|
|
finally: |
|
|
|
|
|
for handler in root_logger.handlers[:]: |
|
|
root_logger.removeHandler(handler) |
|
|
for handler in original_handlers: |
|
|
root_logger.addHandler(handler) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
pytest.main([__file__, "-v", "-s"]) |
|
|
|