Spaces:
Running
Running
Testing Framework - BDR Agent Factory
Overview
Comprehensive testing strategy for AI capabilities, ensuring quality, compliance, and reliability across all insurance business systems.
Testing Pyramid
βββββββββββββββββββ
β E2E Tests (5%) β
βββββββββββββββββββ
βββββββββββββββββββββββββββββ
β Integration Tests (15%) β
βββββββββββββββββββββββββββββ
βββββββββββββββββββββββββββββββββββββ
β Component Tests (30%) β
βββββββββββββββββββββββββββββββββββββ
βββββββββββββββββββββββββββββββββββββββββββββ
β Unit Tests (50%) β
βββββββββββββββββββββββββββββββββββββββββββββ
1. Unit Tests
Purpose
Test individual capability functions in isolation.
Coverage Requirements
- Minimum: 80% code coverage
- Target: 90% code coverage
- Critical paths: 100% coverage
Example: Text Classification Unit Test
import pytest
from bdr_agent_factory.capabilities import TextClassification
class TestTextClassification:
@pytest.fixture
def classifier(self):
return TextClassification(
model_version="1.0.0",
classes=["property_damage", "auto_accident", "health_claim"]
)
def test_basic_classification(self, classifier):
"""Test basic text classification"""
result = classifier.classify(
text="Water damage in basement after storm"
)
assert result.predicted_class == "property_damage"
assert result.confidence > 0.7
assert len(result.all_scores) == 3
def test_empty_input(self, classifier):
"""Test handling of empty input"""
with pytest.raises(ValueError, match="Input text cannot be empty"):
classifier.classify(text="")
def test_long_input(self, classifier):
"""Test handling of excessively long input"""
long_text = "word " * 10000
with pytest.raises(ValueError, match="Input exceeds maximum length"):
classifier.classify(text=long_text)
def test_confidence_threshold(self, classifier):
"""Test confidence threshold filtering"""
result = classifier.classify(
text="Ambiguous claim description",
confidence_threshold=0.95
)
if result.confidence < 0.95:
assert result.predicted_class is None
def test_explainability(self, classifier):
"""Test explanation generation"""
result = classifier.classify(
text="Water damage in basement",
explain=True
)
assert result.explanation is not None
assert "key_features" in result.explanation
assert len(result.explanation["key_features"]) > 0
2. Component Tests
Purpose
Test capability components with their dependencies (models, databases, etc.).
Example: Fraud Detection Component Test
import pytest
from bdr_agent_factory.capabilities import FraudDetection
from bdr_agent_factory.models import ModelRegistry
class TestFraudDetectionComponent:
@pytest.fixture
def fraud_detector(self):
model = ModelRegistry.load("fraud_detection_v1")
return FraudDetection(model=model)
def test_fraud_detection_with_model(self, fraud_detector):
"""Test fraud detection with actual model"""
claim_data = {
"claim_amount": 50000,
"claim_type": "auto_accident",
"claimant_history": {"previous_claims": 5},
"incident_details": "Rear-end collision on highway"
}
result = fraud_detector.detect(claim_data)
assert result.fraud_score >= 0.0
assert result.fraud_score <= 1.0
assert result.risk_level in ["low", "medium", "high"]
assert result.explanation is not None
def test_audit_trail_creation(self, fraud_detector):
"""Test that audit trail is created"""
claim_data = {"claim_amount": 10000}
result = fraud_detector.detect(
claim_data,
audit=True,
request_id="test_req_123"
)
assert result.audit_id is not None
# Verify audit record was created in database
audit_record = fraud_detector.get_audit_record(result.audit_id)
assert audit_record.request_id == "test_req_123"
3. Integration Tests
Purpose
Test end-to-end capability invocation through API.
Example: API Integration Test
import pytest
import requests
from bdr_agent_factory.test_utils import TestClient
class TestCapabilityAPI:
@pytest.fixture
def client(self):
return TestClient(
base_url="http://localhost:8000",
api_key="test_api_key"
)
def test_capability_invocation(self, client):
"""Test capability invocation via API"""
response = client.post(
"/v1/capabilities/cap_text_classification/invoke",
json={
"input": {
"text": "Customer reported water damage"
},
"options": {
"explain": True,
"audit_trail": True
}
}
)
assert response.status_code == 200
data = response.json()
assert "result" in data
assert "predicted_class" in data["result"]
assert "explanation" in data["result"]
assert "audit_trail" in data
def test_batch_processing(self, client):
"""Test batch capability invocation"""
response = client.post(
"/v1/capabilities/cap_text_classification/batch",
json={
"inputs": [
{"text": "Claim 1"},
{"text": "Claim 2"},
{"text": "Claim 3"}
]
}
)
assert response.status_code == 202 # Accepted
data = response.json()
assert "batch_id" in data
assert data["status"] == "processing"
# Poll for completion
batch_id = data["batch_id"]
status = client.get_batch_status(batch_id)
assert status in ["processing", "completed"]
def test_authentication_required(self, client):
"""Test that authentication is required"""
client_no_auth = TestClient(base_url="http://localhost:8000")
response = client_no_auth.post(
"/v1/capabilities/cap_text_classification/invoke",
json={"input": {"text": "Test"}}
)
assert response.status_code == 401
4. End-to-End Tests
Purpose
Test complete business workflows across multiple systems.
Example: Claims Processing E2E Test
import pytest
from bdr_agent_factory.test_utils import E2ETestHarness
class TestClaimsProcessingWorkflow:
@pytest.fixture
def harness(self):
return E2ETestHarness(
systems=["ClaimsGPT", "FraudDetectionAgent"],
environment="staging"
)
def test_complete_claims_workflow(self, harness):
"""Test complete claims processing workflow"""
# Step 1: Submit claim
claim = harness.submit_claim({
"claimant": "John Doe",
"claim_type": "auto_accident",
"description": "Rear-end collision on I-5",
"amount": 5000
})
assert claim.id is not None
# Step 2: Classify claim
classification = harness.invoke_capability(
"cap_text_classification",
input={"text": claim.description}
)
assert classification.predicted_class == "auto_accident"
# Step 3: Fraud detection
fraud_check = harness.invoke_capability(
"cap_fraud_detection",
input={"claim_data": claim.to_dict()}
)
assert fraud_check.risk_level in ["low", "medium", "high"]
# Step 4: Decision
decision = harness.make_decision(
claim_id=claim.id,
classification=classification,
fraud_check=fraud_check
)
assert decision.type in ["approve", "review", "reject"]
# Step 5: Verify audit trail
audit_trail = harness.get_audit_trail(claim.id)
assert len(audit_trail) >= 3 # Classification, fraud check, decision
# Step 6: Verify compliance
compliance_check = harness.verify_compliance(
claim_id=claim.id,
frameworks=["GDPR", "IFRS17"]
)
assert compliance_check.is_compliant is True
5. Performance Tests
Purpose
Ensure capabilities meet performance SLAs.
Load Testing
import pytest
from locust import HttpUser, task, between
import time
class CapabilityLoadTest(HttpUser):
wait_time = between(1, 3)
def on_start(self):
"""Authenticate before testing"""
response = self.client.post("/auth/token", json={
"client_id": "test_client",
"client_secret": "test_secret"
})
self.token = response.json()["access_token"]
@task(3)
def invoke_text_classification(self):
"""Test text classification under load"""
self.client.post(
"/v1/capabilities/cap_text_classification/invoke",
headers={"Authorization": f"Bearer {self.token}"},
json={
"input": {"text": "Sample claim description"}
}
)
@task(1)
def invoke_fraud_detection(self):
"""Test fraud detection under load"""
self.client.post(
"/v1/capabilities/cap_fraud_detection/invoke",
headers={"Authorization": f"Bearer {self.token}"},
json={
"input": {"claim_amount": 10000}
}
)
# Run with: locust -f performance_tests.py --users 100 --spawn-rate 10
Performance Benchmarks
import pytest
import time
from bdr_agent_factory.capabilities import TextClassification
class TestPerformanceBenchmarks:
def test_latency_p95(self):
"""Test that P95 latency is under 300ms"""
classifier = TextClassification()
latencies = []
for _ in range(100):
start = time.time()
classifier.classify(text="Sample text for classification")
latency = (time.time() - start) * 1000 # Convert to ms
latencies.append(latency)
latencies.sort()
p95_latency = latencies[94] # 95th percentile
assert p95_latency < 300, f"P95 latency {p95_latency}ms exceeds 300ms SLA"
def test_throughput(self):
"""Test minimum throughput of 100 requests/second"""
classifier = TextClassification()
start = time.time()
for _ in range(100):
classifier.classify(text="Sample text")
duration = time.time() - start
throughput = 100 / duration
assert throughput >= 100, f"Throughput {throughput} RPS below 100 RPS SLA"
6. Compliance Tests
Purpose
Verify adherence to regulatory requirements.
GDPR Compliance Tests
import pytest
from bdr_agent_factory.compliance import GDPRValidator
class TestGDPRCompliance:
def test_data_minimization(self):
"""Test that only necessary data is collected"""
validator = GDPRValidator()
claim_data = {
"claim_id": "123",
"description": "Claim description",
"amount": 5000
}
result = validator.validate_data_minimization(claim_data)
assert result.is_compliant is True
def test_right_to_explanation(self):
"""Test that explanations are available"""
from bdr_agent_factory.capabilities import TextClassification
classifier = TextClassification()
result = classifier.classify(
text="Sample text",
explain=True
)
assert result.explanation is not None
assert "key_features" in result.explanation
def test_data_retention(self):
"""Test that data retention policies are enforced"""
from bdr_agent_factory.audit import AuditService
audit_service = AuditService()
# Create audit record with retention period
audit_id = audit_service.create_audit(
capability_id="cap_test",
retention_days=2555 # 7 years for GDPR
)
audit_record = audit_service.get_audit(audit_id)
assert audit_record.retention_days == 2555
IFRS 17 Compliance Tests
class TestIFRS17Compliance:
def test_audit_trail_completeness(self):
"""Test complete audit trail for insurance contracts"""
from bdr_agent_factory.audit import AuditService
audit_service = AuditService()
# Simulate underwriting decision
audit_id = audit_service.create_audit(
capability_id="cap_underwriting",
input_data={"policy_data": "..."},
output_data={"decision": "approve"},
compliance_flags={"ifrs17_compliant": True}
)
audit_record = audit_service.get_audit(audit_id)
assert audit_record.compliance_flags["ifrs17_compliant"] is True
assert audit_record.input_hash is not None
assert audit_record.output_hash is not None
7. Security Tests
Purpose
Identify security vulnerabilities.
Authentication Tests
class TestSecurity:
def test_sql_injection_prevention(self):
"""Test SQL injection prevention"""
from bdr_agent_factory.test_utils import TestClient
client = TestClient()
# Attempt SQL injection
response = client.post(
"/v1/capabilities/cap_text_classification/invoke",
json={
"input": {
"text": "'; DROP TABLE capabilities; --"
}
}
)
# Should process safely without executing SQL
assert response.status_code in [200, 400]
def test_xss_prevention(self):
"""Test XSS prevention"""
from bdr_agent_factory.test_utils import TestClient
client = TestClient()
response = client.post(
"/v1/capabilities/cap_text_classification/invoke",
json={
"input": {
"text": "<script>alert('XSS')</script>"
}
}
)
# Should sanitize input
assert "<script>" not in response.text
def test_rate_limiting(self):
"""Test rate limiting enforcement"""
from bdr_agent_factory.test_utils import TestClient
client = TestClient()
# Make requests exceeding rate limit
for i in range(150): # Limit is 100/minute
response = client.post(
"/v1/capabilities/cap_text_classification/invoke",
json={"input": {"text": f"Request {i}"}}
)
if i >= 100:
assert response.status_code == 429 # Too Many Requests
8. Test Data Management
Test Data Sets
# tests/fixtures/test_data.py
CLAIM_DESCRIPTIONS = [
{
"text": "Water damage to basement after heavy rain",
"expected_class": "property_damage",
"min_confidence": 0.8
},
{
"text": "Rear-end collision on highway",
"expected_class": "auto_accident",
"min_confidence": 0.85
},
{
"text": "Slip and fall in grocery store",
"expected_class": "liability",
"min_confidence": 0.75
}
]
FRAUD_SCENARIOS = [
{
"claim_amount": 100000,
"claimant_history": {"previous_claims": 10},
"expected_risk": "high"
},
{
"claim_amount": 5000,
"claimant_history": {"previous_claims": 0},
"expected_risk": "low"
}
]
9. Continuous Integration
GitHub Actions Workflow
# .github/workflows/test.yml
name: Test Suite
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -r requirements-test.txt
- name: Run unit tests
run: pytest tests/unit --cov=bdr_agent_factory --cov-report=xml
- name: Run integration tests
run: pytest tests/integration
- name: Run compliance tests
run: pytest tests/compliance
- name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
- name: Run security scan
run: bandit -r bdr_agent_factory/
10. Test Reporting
Coverage Report
# Generate HTML coverage report
pytest --cov=bdr_agent_factory --cov-report=html
# View report
open htmlcov/index.html
Test Metrics Dashboard
- Test Coverage: Target 90%+
- Test Execution Time: < 5 minutes for full suite
- Flaky Test Rate: < 1%
- Test Pass Rate: > 99%
Best Practices
- Write tests first (TDD approach)
- Keep tests independent (no shared state)
- Use descriptive test names (test_should_classify_property_damage_correctly)
- Mock external dependencies (APIs, databases)
- Test edge cases (empty input, max length, special characters)
- Maintain test data (version control test datasets)
- Run tests in CI/CD (automated on every commit)
- Monitor test performance (identify slow tests)
- Review test coverage (ensure critical paths covered)
- Update tests with code (keep tests in sync)
Test Execution
# Run all tests
pytest
# Run specific test category
pytest tests/unit
pytest tests/integration
pytest tests/e2e
# Run with coverage
pytest --cov=bdr_agent_factory
# Run specific test file
pytest tests/unit/test_text_classification.py
# Run specific test
pytest tests/unit/test_text_classification.py::TestTextClassification::test_basic_classification
# Run with verbose output
pytest -v
# Run in parallel
pytest -n auto
Support
For testing support:
- Documentation: https://docs.bdragentfactory.com/testing
- Email: qa@bdragentfactory.com