File size: 18,965 Bytes
63256f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
"""
Integration tests for ai-me agent.
Tests the complete setup including vectorstore, agent configuration, and agent responses.
"""
import pytest
import pytest_asyncio
import re
import sys
import os
import logging
from datetime import datetime
from unittest.mock import AsyncMock, patch

# Something about these tests makes me feel yucky. Big, brittle, and slow. BBS?
# In the future we should run inference locally with docker-compose models.

# Set temperature and seed for deterministic test results
os.environ["TEMPERATURE"] = "0"
os.environ["SEED"] = "42"

# Point our RAG to the tests/data directory
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
test_data_dir = os.path.join(project_root, "tests", "data")
os.environ["DOC_ROOT"] = test_data_dir
os.environ["LOCAL_DOCS"] = "**/*.md"

from config import setup_logger, Config
from agent import AIMeAgent
from data import DataManager, DataManagerConfig

logger = setup_logger(__name__)

# ============================================================================
# SHARED CACHING - Initialize on first use, then reuse
# ============================================================================

_config = None
_vectorstore = None
_data_manager = None


def _get_shared_config():
    """Lazy initialization of shared config."""
    global _config
    if _config is None:
        _config = Config()  # type: ignore
        logger.info(f"Initialized shared config: {_config.bot_full_name}")
    return _config


def _get_shared_vectorstore():
    """Lazy initialization of shared vectorstore."""
    global _vectorstore, _data_manager
    if _vectorstore is None:
        logger.info("Initializing shared vectorstore (first test)...")
        test_data_dir = os.path.join(project_root, "tests", "data")
        _data_config = DataManagerConfig(
            doc_root=test_data_dir
        )
        _data_manager = DataManager(config=_data_config)
        _vectorstore = _data_manager.setup_vectorstore()
        logger.info(f"Shared vectorstore ready: {_vectorstore._collection.count()} documents")
    return _vectorstore


@pytest_asyncio.fixture(scope="function")
async def ai_me_agent():
    """
    Setup fixture for ai-me agent with vectorstore and MCP servers.
    
    CRITICAL: Function-scoped fixture prevents hanging/blocking issues.
    Each test gets its own agent instance with proper cleanup.
    
    Reuses shared config and vectorstore (lazy-initialized on first use).
    
    This fixture:
    - Reuses shared config and vectorstore
    - Creates agent WITH real subprocess MCP servers (GitHub, Time, Memory)
    - Yields agent for test
    - Cleans up MCP servers after test completes
    """
    config = _get_shared_config()
    vectorstore = _get_shared_vectorstore()
    
    # Initialize agent config with shared vectorstore
    aime_agent = AIMeAgent(
        bot_full_name=config.bot_full_name,
        model=config.model,
        vectorstore=vectorstore,
        github_token=config.github_token,
        session_id="test-session"
    )
    
    # Create the agent WITH MCP servers enabled
    logger.info("Creating ai-me agent with MCP servers...")
    assert aime_agent.session_id is not None, "session_id should be set"
    await aime_agent.create_ai_me_agent(
        mcp_params=[
            aime_agent.mcp_github_params,
            aime_agent.mcp_time_params,
            aime_agent.get_mcp_memory_params(aime_agent.session_id),
        ]
    )
    logger.info("Agent created successfully with MCP servers")
    logger.info(f"Temperature set to {config.temperature}")
    logger.info(f"Seed set to {config.seed}")
    
    # Yield the agent for the test
    yield aime_agent
    
    # CRITICAL: Cleanup after test completes to prevent hanging
    logger.info("Cleaning up MCP servers after test...")
    await aime_agent.cleanup()
    logger.info("Cleanup complete")


@pytest.mark.asyncio
async def test_github_documents_load():
    """Tests FR-002: GitHub document loading with source metadata."""
    config = Config()  # type: ignore
    
    # Load GitHub documents directly
    github_config = DataManagerConfig(
        doc_load_local=[]
    )
    dm = DataManager(config=github_config)
    vs = dm.setup_vectorstore(github_repos=["byoung/ai-me"])
    
    agent = AIMeAgent(
        bot_full_name=config.bot_full_name,
        model=config.model,
        vectorstore=vs,
        github_token=config.github_token,
        session_id="test-session"
    )
    await agent.create_ai_me_agent()

    response = await agent.run("Do you have python experience?")
    
    assert "yes" in response.lower(), (
        f"yes' in response but got: {response}"
    )


@pytest.mark.asyncio
async def test_rear_knowledge_contains_it245(ai_me_agent):
    """Tests REQ-001: Knowledge base retrieval of personal documentation."""
    response = await ai_me_agent.run("What is IT-245?")
    
    assert "IT-245" in response or "It-245" in response or "it-245" in response
    logger.info("βœ“ IT-245 found in response")


@pytest.mark.asyncio
async def test_github_commits_contains_shas(ai_me_agent):
    """Tests REQ-002: MCP GitHub integration - retrieve commit history."""
    response = await ai_me_agent.run("What are some recent commits I've made?")
    
    assert response, "Response is empty"
    assert len(response) > 10, "Response is too short"
    logger.info("βœ“ Response contains commit information")

@pytest.mark.asyncio
async def test_unknown_person_contains_negative_response(ai_me_agent):
    """Tests REQ-003: Graceful handling of out-of-scope requests."""
    response = await ai_me_agent.run(
        "Do you know Slartibartfast?"  # Presumed unknown person
    )
    
    assert response, "Response is empty"
    assert (
        "don't know" in response.lower() 
        or "not familiar" in response.lower() 
        or "no information" in response.lower()
        or "don't have any information" in response.lower()
    ), f"Response doesn't indicate lack of knowledge: {response}"
    logger.info(f"βœ“ Test passed - correctly handled out-of-scope query")


@pytest.mark.asyncio
async def test_carol_knowledge_contains_product(ai_me_agent):
    """Tests FR-002, FR-003: Verify asking about Carol returns 'product'."""
    response_raw = await ai_me_agent.run("Do you know Carol?")
    response = response_raw.lower()  # Convert to lowercase for matching
    
    # Assert that 'product' appears in the response (Carol is Product Owner)
    assert "product" in response, (
        f"Expected 'product' in response but got: {response}"
    )
    logger.info("βœ“ Test passed: Response contains 'product'")


@pytest.mark.asyncio
async def test_mcp_time_server_returns_current_date(ai_me_agent):
    """Tests FR-009, NFR-001: Verify that the MCP time server returns the current date."""
    response = await ai_me_agent.run("What is today's date?")

    # Check for current date in various formats (ISO or natural language)
    now = datetime.now()
    expected_date, current_year, current_month, current_day = (
        now.strftime("%Y-%m-%d"),
        str(now.year),
        now.strftime("%B"),
        str(now.day),
    )

    # Accept either ISO format or natural language date
    has_date = (
        expected_date in response
        or (
            current_year in response
            and current_month in response
            and current_day in response
        )
    )

    assert has_date, (
        f"Expected response to contain current date "
        f"({expected_date} or {current_month} {current_day}, {current_year}) "
        f"but got: {response}"
    )
    logger.info(f"βœ“ Test passed: Response contains current date")


@pytest.mark.asyncio
async def test_mcp_memory_server_remembers_favorite_color(ai_me_agent):
    """Tests FR-013, NFR-002: 
        Verify that the MCP memory server persists information across interactions.
    """
    await ai_me_agent.run("My favorite color is chartreuse.")
    response2 = await ai_me_agent.run("What's my favorite color?")
    
    # Check that the agent remembers the color
    assert "chartreuse" in response2.lower(), (
        f"Expected agent to remember favorite color 'chartreuse' "
        f"but got: {response2}"
    )
    msg = (
        "βœ“ Test passed: Agent remembered favorite color 'chartreuse' "
        "across interactions"
    )
    logger.info(msg)


@pytest.mark.asyncio
async def test_github_relative_links_converted_to_absolute_urls():
    """Tests FR-004: Document processing converts relative GitHub links to absolute URLs.
    
    Validates that when documents are loaded from GitHub with relative links 
    (e.g., /resume.md), they are rewritten to full GitHub URLs 
    (e.g., https://github.com/owner/repo/blob/main/resume.md).
    
    This is a unit-level test of the DataManager.process_documents() method.
    """
    from langchain_core.documents import Document
    
    sample_doc = Document(
        page_content=(
            "Check out [my resume](/resume.md) and "
            "[projects](/projects.md) for more info."
        ),
        metadata={
            "source": "github://byoung/ai-me/docs/about.md",
            "github_repo": "byoung/ai-me"
        }
    )
    
    # Verify metadata is set correctly before processing
    assert sample_doc.metadata["github_repo"] == "byoung/ai-me", (
        "Sample doc metadata should have github_repo"
    )
    
    data_config = DataManagerConfig()
    data_manager = DataManager(config=data_config)
    processed_docs = data_manager.process_documents([sample_doc])
    
    assert len(processed_docs) == 1, "Expected 1 processed document"
    processed_content = processed_docs[0].page_content
    
    # Check that relative links have been converted to absolute GitHub URLs
    assert "https://github.com/byoung/ai-me/blob/main/resume.md" in processed_content, (
        f"Expected absolute GitHub URL for /resume.md in processed content, "
        f"but got: {processed_content}"
    )
    assert "https://github.com/byoung/ai-me/blob/main/projects.md" in processed_content, (
        f"Expected absolute GitHub URL for /projects.md in processed content, "
        f"but got: {processed_content}"
    )
    
    logger.info("βœ“ Test passed: Relative GitHub links converted to absolute URLs")
    logger.info(f"  Original: [my resume](/resume.md)")
    logger.info(f"  Converted: [my resume](https://github.com/byoung/ai-me/blob/main/resume.md)")


@pytest.mark.asyncio
async def test_agent_responses_cite_sources(ai_me_agent):
    """Tests FR-004, FR-011: Agent responses include source citations.
    
    Validates that agent responses include proper source attribution,
    which could be GitHub URLs, local paths, or explicit source references.
    """
    questions = [
        "What do you know about ReaR?",
        "Tell me about your experience in technology",
    ]
    
    for question in questions:
        logger.info(f"\n{'='*60}\nSource citation test: {question}\n{'='*60}")
        
        response = await ai_me_agent.run(question)
        
        # Check that response includes some form of source attribution
        # Could be: GitHub URL, local path, "Sources" section, etc.
        has_source = (
            "https://github.com/" in response or
            ".md" in response or  # Local markdown file reference
            "source" in response.lower() or
            "documentation" in response.lower()
        )
        assert has_source, (
            f"Expected source attribution in response to '{question}' "
            f"but found none. Response: {response}"
        )
        
        # Verify response is substantive (not just metadata)
        min_length = 50
        assert len(response) > min_length, (
            f"Response to '{question}' was too short: {response}"
        )
        
        logger.info(f"βœ“ Source citation found for: {question[:40]}...")
    
    logger.info("\nβœ“ Test passed: Agent responses cite sources (FR-004, FR-011)")


@pytest.mark.asyncio
async def test_user_story_2_multi_topic_consistency(ai_me_agent):
    """
    Tests FR-001, FR-003, FR-005, NFR-002: User Story 2 - Multi-Topic Consistency
    
    Verify that the agent maintains consistent first-person perspective 
    across multiple conversation topics.
    
    This tests that the agent:
    - Uses first-person perspective (I, my, me) consistently
    - Maintains professional tone across different topic switches
    - Shows context awareness of different topics
    - Remains in-character as the personified individual
    """
    # Ask 3 questions about different topics
    topics = [
        ("What is your background in technology?", "background|experience|technology"),
        ("What programming languages are you skilled in?", "programming|language|skilled"),
    ]
    
    first_person_patterns = [
        r"\bi\b", r"\bme\b", r"\bmy\b", r"\bmyself\b", 
        r"\bI['m]", r"\bI['ve]", r"\bI['ll]"
    ]
    
    for question, topic_keywords in topics:
        logger.info(f"\n{'='*60}\nMulti-topic test question: {question}\n{'='*60}")
        
        response = await ai_me_agent.run(question)
        response_lower = response.lower()
        
        # Check for first-person usage
        first_person_found = any(
            re.search(pattern, response, re.IGNORECASE) 
            for pattern in first_person_patterns
        )
        assert first_person_found, (
            f"Expected first-person perspective in response to '{question}' "
            f"but got: {response}"
        )
        
        # Verify response is substantive (not just "I don't know")
        min_length = 50  # Substantive responses should be > 50 chars
        assert len(response) > min_length, (
            f"Response to '{question}' was too short (likely not substantive): {response}"
        )
        
        logger.info(f"βœ“ First-person perspective maintained for: {question[:40]}...")
        logger.info(f"  Response preview: {response[:100]}...")
    
    logger.info("\nβœ“ Test passed: Consistent first-person perspective across 3+ topics")


@pytest.mark.asyncio
async def test_tool_failure_error_messages_are_friendly(caplog, ai_me_agent):
    """
    Tests FR-012, NFR-003: Error Message Quality (FR-012)
    
    Verify that tool failures return user-friendly messages without Python tracebacks.
    
    This tests that the agent:
    - Returns human-readable error messages
    - logs an error that can be reviewed in our dashboard/logs

    Uses mocking to simulate tool failures without adding test-specific code to agent.py
    """
    logger.info(f"\n{'='*60}\nError Handling Test\n{'='*60}")
    
    # Mock the Runner.run method to simulate a tool failure
    # This tests the catch-all exception handler without adding test code to production
    test_scenarios = [
        RuntimeError("Simulated tool timeout"),
        ValueError("Invalid tool parameters"),
    ]
    
    for error in test_scenarios:
        logger.info(f"\nTesting error scenario: {error.__class__.__name__}: {error}")
        
        # Clear previous log records for this iteration
        caplog.clear()
        
        # Mock Runner.run to raise an exception
        with patch('agent.Runner.run', new_callable=AsyncMock) as mock_run:
            mock_run.side_effect = error
            
            response = await ai_me_agent.run("Any user question")
            
            logger.info(f"Response: {response[:100]}...")
            
            # PRIMARY CHECK: Verify "I encountered an unexpected error" is in response
            assert "I encountered an unexpected error" in response, (
                f"Response must contain 'I encountered an unexpected error'. Got: {response}"
            )
            
            # SECONDARY CHECK: Verify error was logged by agent.py
            error_logs = [record for record in caplog.records if record.levelname == "ERROR"]
            assert len(error_logs) > 0, "Expected at least one ERROR log record from agent.py"
            
            # Find the agent.py error log (contains "Unexpected error:")
            agent_error_logged = any(
                "Unexpected error:" in record.message for record in error_logs
            )
            assert agent_error_logged, (
                f"Expected ERROR log with 'Unexpected error:' from agent.py. "
                f"Got: {[r.message for r in error_logs]}"
            )
            error_messages = [
                r.message for r in error_logs
                if "Unexpected error:" in r.message
            ]
            logger.info(
                f"βœ“ Error properly logged to logger: {error_messages}"
            )
    
    logger.info("\nβœ“ Test passed: Error messages are friendly (FR-012) + properly logged")


@pytest.mark.asyncio
async def test_logger_setup_format(caplog):
    """Tests NFR-003 (Structured Logging): Verify setup_logger creates structured logging.
    
    Tests that setup_logger() configures syslog-style format with JSON support for
    structured logging of user/agent interactions.
    
    This validates the logger configuration that our production app relies on
    for analytics and debugging.
    """
    # Force logger setup to run by clearing handlers so setup_logger reconfigures
    root_logger = logging.getLogger()
    original_handlers = root_logger.handlers[:]
    for handler in root_logger.handlers[:]:
        root_logger.removeHandler(handler)
    
    try:
        # Now call setup_logger with no handlers - should trigger full setup
        test_logger = setup_logger("test.structured_logging")
        
        # Verify logger was created
        assert test_logger.name == "test.structured_logging"
        
        # Verify root logger now has handlers (setup_logger should have added them)
        assert len(root_logger.handlers) > 0, (
            "Root logger should have handlers after setup_logger"
        )
        
        # Verify we have a StreamHandler (console output)
        has_stream_handler = any(
            isinstance(handler, logging.StreamHandler)
            for handler in root_logger.handlers
        )
        assert has_stream_handler, "Should have StreamHandler for console output"
        
        # Test that logging works with structured JSON format
        # The formatters should support JSON logging for analytics
        test_logger.info(
            '{"session_id": "test-session", "user_input": "test message"}'
        )
        
        logger.info(
            "βœ“ Test passed: Logger setup configures structured logging (NFR-003)"
        )
    finally:
        # Restore original handlers
        for handler in root_logger.handlers[:]:
            root_logger.removeHandler(handler)
        for handler in original_handlers:
            root_logger.addHandler(handler)


if __name__ == "__main__":
    # Allow running tests directly with python test.py
    pytest.main([__file__, "-v", "-s"])