Spiritual_Health_Project / tests /unit /test_pattern_recognizer.py
DocUA's picture
feat: Complete prompt optimization system implementation
24214fc
#!/usr/bin/env python3
"""
Test script for the pattern recognizer and error pattern analysis.
Tests Task 4.4 implementation.
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
from config.prompt_management.pattern_recognizer import PatternRecognizer
from config.prompt_management.feedback_system import FeedbackSystem
from config.prompt_management.data_models import (
ErrorType, ErrorSubcategory, QuestionIssueType, ReferralProblemType, ScenarioType
)
def test_pattern_recognizer_initialization():
"""Test that the pattern recognizer initializes correctly."""
print("Testing pattern recognizer initialization...")
# Test with default parameters
recognizer = PatternRecognizer()
assert recognizer.min_pattern_frequency == 3
assert recognizer.confidence_threshold == 0.7
assert hasattr(recognizer, 'analysis_strategies')
assert hasattr(recognizer, 'suggestion_templates')
# Test with custom parameters
custom_recognizer = PatternRecognizer(min_pattern_frequency=5, confidence_threshold=0.8)
assert custom_recognizer.min_pattern_frequency == 5
assert custom_recognizer.confidence_threshold == 0.8
print("βœ“ Pattern recognizer initializes correctly")
return True
def test_classification_error_pattern_analysis():
"""Test pattern analysis for classification errors."""
print("Testing classification error pattern analysis...")
recognizer = PatternRecognizer(min_pattern_frequency=2)
# Create test classification errors
test_errors = []
# Create multiple wrong classification errors
for i in range(4):
test_errors.append({
'error_id': f'error_{i}',
'error_type': 'wrong_classification',
'subcategory': 'green_to_yellow',
'expected_category': 'YELLOW',
'actual_category': 'GREEN',
'message_content': f'I feel stressed about work {i}',
'reviewer_comments': f'Test comment {i}',
'confidence_level': 0.8 + (i * 0.05),
'timestamp': '2024-12-18T10:00:00',
'session_id': f'session_{i}',
'additional_context': {'scenario_type': 'vague_stress'}
})
# Create severity misjudgment errors
for i in range(3):
test_errors.append({
'error_id': f'severity_{i}',
'error_type': 'severity_misjudgment',
'subcategory': 'underestimated_distress',
'expected_category': 'RED',
'actual_category': 'YELLOW',
'message_content': f'I cannot go on like this {i}',
'reviewer_comments': f'Severe distress comment {i}',
'confidence_level': 0.9,
'timestamp': '2024-12-18T11:00:00',
'session_id': f'severity_session_{i}',
'additional_context': {}
})
# Analyze patterns
patterns = recognizer._analyze_classification_error_patterns(test_errors)
# Verify patterns were identified
assert len(patterns) > 0, "Should identify patterns in test data"
# Check for wrong classification pattern
wrong_classification_patterns = [p for p in patterns if 'wrong_classification' in p.pattern_type]
assert len(wrong_classification_patterns) > 0, "Should identify wrong classification pattern"
wrong_pattern = wrong_classification_patterns[0]
assert wrong_pattern.frequency == 4, "Wrong classification pattern should have frequency 4"
assert len(wrong_pattern.suggested_improvements) > 0, "Should have improvement suggestions"
# Check for severity misjudgment pattern
severity_patterns = [p for p in patterns if 'severity_misjudgment' in p.pattern_type]
assert len(severity_patterns) > 0, "Should identify severity misjudgment pattern"
severity_pattern = severity_patterns[0]
assert severity_pattern.frequency == 3, "Severity pattern should have frequency 3"
print(f"βœ“ Identified {len(patterns)} classification error patterns")
for pattern in patterns[:3]: # Show first 3 patterns
print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})")
return True
def test_question_issue_pattern_analysis():
"""Test pattern analysis for question issues."""
print("Testing question issue pattern analysis...")
recognizer = PatternRecognizer(min_pattern_frequency=2)
# Create test question issues
test_questions = []
# Create inappropriate question issues
for i in range(3):
test_questions.append({
'issue_id': f'question_{i}',
'issue_type': 'inappropriate_question',
'question_content': f'Why are you sad? {i}',
'scenario_type': 'loss_of_interest',
'reviewer_comments': f'Too direct question {i}',
'severity': 'medium',
'timestamp': '2024-12-18T12:00:00',
'session_id': f'question_session_{i}',
'suggested_improvement': f'Better question {i}'
})
# Create wrong scenario targeting issues
for i in range(2):
test_questions.append({
'issue_id': f'targeting_{i}',
'issue_type': 'wrong_scenario_targeting',
'question_content': f'How does that make you feel? {i}',
'scenario_type': 'vague_stress',
'reviewer_comments': f'Wrong targeting comment {i}',
'severity': 'high',
'timestamp': '2024-12-18T13:00:00',
'session_id': f'targeting_session_{i}',
'suggested_improvement': None
})
# Analyze patterns
patterns = recognizer._analyze_question_issue_patterns(test_questions)
# Verify patterns were identified
assert len(patterns) > 0, "Should identify question issue patterns"
# Check for inappropriate question pattern
inappropriate_patterns = [p for p in patterns if 'inappropriate_question' in p.pattern_type]
assert len(inappropriate_patterns) > 0, "Should identify inappropriate question pattern"
inappropriate_pattern = inappropriate_patterns[0]
assert inappropriate_pattern.frequency == 3, "Inappropriate question pattern should have frequency 3"
print(f"βœ“ Identified {len(patterns)} question issue patterns")
for pattern in patterns:
print(f" - {pattern.description} (confidence: {pattern.confidence_score:.2f})")
return True
def test_comprehensive_pattern_analysis():
"""Test comprehensive pattern analysis across all feedback types."""
print("Testing comprehensive pattern analysis...")
recognizer = PatternRecognizer(min_pattern_frequency=2)
# Create mixed test data
test_errors = [
{
'error_id': 'comp_error_1',
'error_type': 'wrong_classification',
'subcategory': 'green_to_yellow',
'expected_category': 'YELLOW',
'actual_category': 'GREEN',
'message_content': 'I feel overwhelmed',
'reviewer_comments': 'Clear distress missed',
'confidence_level': 0.9,
'timestamp': '2024-12-18T14:00:00',
'session_id': 'comp_session_1',
'additional_context': {}
},
{
'error_id': 'comp_error_2',
'error_type': 'wrong_classification',
'subcategory': 'green_to_yellow',
'expected_category': 'YELLOW',
'actual_category': 'GREEN',
'message_content': 'Everything is falling apart',
'reviewer_comments': 'Obvious distress indicators',
'confidence_level': 0.95,
'timestamp': '2024-12-18T14:30:00',
'session_id': 'comp_session_2',
'additional_context': {}
}
]
test_questions = [
{
'issue_id': 'comp_question_1',
'issue_type': 'insensitive_language',
'question_content': 'What is wrong with you?',
'scenario_type': 'vague_stress',
'reviewer_comments': 'Harsh language',
'severity': 'high',
'timestamp': '2024-12-18T15:00:00',
'session_id': 'comp_session_1', # Same session as error
'suggested_improvement': 'Use gentler language'
}
]
test_referrals = [
{
'problem_id': 'comp_referral_1',
'problem_type': 'incomplete_summary',
'referral_content': 'Patient needs help.',
'reviewer_comments': 'Missing details',
'severity': 'medium',
'timestamp': '2024-12-18T16:00:00',
'session_id': 'comp_session_3',
'missing_fields': ['distress_indicators', 'urgency_level']
}
]
# Analyze comprehensive patterns
patterns = recognizer.analyze_comprehensive_patterns(test_errors, test_questions, test_referrals)
# Verify patterns were identified
assert len(patterns) > 0, "Should identify comprehensive patterns"
# Check for cross-feedback patterns (same session with error and question)
cross_patterns = [p for p in patterns if 'correlation' in p.pattern_type]
# Note: May not always find correlation with small test data
print(f"βœ“ Identified {len(patterns)} comprehensive patterns")
for pattern in patterns[:5]: # Show first 5 patterns
print(f" - {pattern.description}")
if pattern.suggested_improvements:
print(f" Suggestion: {pattern.suggested_improvements[0]}")
return True
def test_optimization_report_generation():
"""Test optimization report generation."""
print("Testing optimization report generation...")
recognizer = PatternRecognizer(min_pattern_frequency=1)
# Create test patterns
from config.prompt_management.data_models import ErrorPattern
test_patterns = [
ErrorPattern(
pattern_id="test_pattern_1",
pattern_type="error_type_wrong_classification",
description="Frequent wrong classification errors (5 occurrences)",
frequency=5,
affected_scenarios=[ScenarioType.VAGUE_STRESS],
suggested_improvements=[
"Review classification criteria",
"Add more training examples",
"Improve decision boundaries"
],
confidence_score=0.8
),
ErrorPattern(
pattern_id="test_pattern_2",
pattern_type="question_issue_inappropriate_question",
description="Frequent inappropriate question issues (3 occurrences)",
frequency=3,
affected_scenarios=[ScenarioType.LOSS_OF_INTEREST],
suggested_improvements=[
"Review question appropriateness",
"Add sensitivity training"
],
confidence_score=0.6
)
]
# Generate optimization report
report = recognizer.generate_optimization_report(test_patterns)
# Verify report structure
required_fields = [
'summary', 'total_patterns', 'recommendations', 'priority_actions',
'confidence_score', 'most_frequent_pattern', 'affected_scenarios',
'report_generated'
]
for field in required_fields:
assert field in report, f"Report missing required field: {field}"
# Verify report content
assert report['total_patterns'] == 2, "Should report correct number of patterns"
assert len(report['recommendations']) > 0, "Should have recommendations"
assert 0.0 <= report['confidence_score'] <= 1.0, "Confidence score should be valid"
assert report['most_frequent_pattern']['frequency'] == 5, "Should identify most frequent pattern"
print("βœ“ Optimization report generated successfully")
print(f" - Total patterns: {report['total_patterns']}")
print(f" - Confidence score: {report['confidence_score']:.2f}")
print(f" - Top recommendation: {report['recommendations'][0] if report['recommendations'] else 'None'}")
return True
def test_feedback_system_integration():
"""Test integration with feedback system."""
print("Testing feedback system integration...")
# Create feedback system with pattern recognizer
feedback_system = FeedbackSystem(storage_path=".verification_data/test_pattern_integration")
# Record multiple similar errors to create patterns
for i in range(4):
feedback_system.record_classification_error(
error_type=ErrorType.WRONG_CLASSIFICATION,
subcategory=ErrorSubcategory.GREEN_TO_YELLOW,
expected_category="YELLOW",
actual_category="GREEN",
message_content=f"I feel stressed and overwhelmed {i}",
reviewer_comments=f"Clear distress indicators missed {i}",
confidence_level=0.85 + (i * 0.02),
session_id=f"integration_session_{i}",
additional_context={"scenario_type": "vague_stress"}
)
# Record question issues
for i in range(3):
feedback_system.record_question_issue(
issue_type=QuestionIssueType.INAPPROPRIATE_QUESTION,
question_content=f"What's wrong with you? {i}",
scenario_type=ScenarioType.VAGUE_STRESS,
reviewer_comments=f"Too harsh language {i}",
severity="high",
session_id=f"integration_session_{i}"
)
# Analyze patterns through feedback system
patterns = feedback_system.analyze_error_patterns(min_frequency=2)
# Verify patterns were identified
assert len(patterns) > 0, "Feedback system should identify patterns"
# Generate optimization report
report = feedback_system.generate_optimization_report()
# Verify report
assert report['total_patterns'] > 0, "Should have patterns in report"
assert len(report['recommendations']) > 0, "Should have recommendations"
print(f"βœ“ Feedback system integration works")
print(f" - Patterns identified: {len(patterns)}")
print(f" - Report confidence: {report['confidence_score']:.2f}")
return True
def main():
"""Run all pattern recognizer tests."""
print("=" * 60)
print("PATTERN RECOGNIZER TESTS")
print("=" * 60)
tests = [
test_pattern_recognizer_initialization,
test_classification_error_pattern_analysis,
test_question_issue_pattern_analysis,
test_comprehensive_pattern_analysis,
test_optimization_report_generation,
test_feedback_system_integration
]
passed = 0
failed = 0
for test in tests:
try:
print(f"\n{test.__name__.replace('_', ' ').title()}:")
print("-" * 40)
result = test()
if result:
passed += 1
print("βœ“ PASSED")
else:
failed += 1
print("βœ— FAILED")
except Exception as e:
failed += 1
print(f"βœ— FAILED: {str(e)}")
print("\n" + "=" * 60)
print(f"RESULTS: {passed} passed, {failed} failed")
print("=" * 60)
if failed == 0:
print("πŸŽ‰ All pattern recognizer tests passed!")
print("\n**Task 4.4: Error Pattern Analysis**")
print("βœ“ COMPLETED: PatternRecognizer for identifying common error types")
print("βœ“ COMPLETED: Automated improvement suggestion generation")
print("βœ“ COMPLETED: Feedback aggregation and reporting")
print("βœ“ COMPLETED: Integration with FeedbackSystem")
return True
else:
print("❌ Some tests failed. Please check the implementation.")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)