code2-repo / test_lda_complete.py
Deepu1965's picture
Upload folder using huggingface_hub
9b1c753 verified
"""
Comprehensive test for LDARiskDiscovery compatibility with trainer
"""
print("=" * 60)
print("Testing LDARiskDiscovery Complete Interface")
print("=" * 60)
try:
from risk_discovery import LDARiskDiscovery
import numpy as np
print("\nβœ… Step 1: Import successful")
# Create instance
print("\nπŸ”§ Step 2: Creating LDARiskDiscovery instance...")
lda = LDARiskDiscovery(n_clusters=3)
print(" βœ… Instance created")
# Check all required attributes
print("\nπŸ“‹ Step 3: Checking required attributes...")
required_attrs = [
'n_clusters',
'discovered_patterns',
'cluster_labels',
'feature_matrix',
'legal_indicators',
'complexity_indicators'
]
for attr in required_attrs:
if hasattr(lda, attr):
print(f" βœ… {attr}: Present")
else:
print(f" ❌ {attr}: MISSING")
raise AttributeError(f"Missing attribute: {attr}")
# Check all required methods
print("\nπŸ” Step 4: Checking required methods...")
required_methods = [
'discover_risk_patterns',
'get_risk_labels',
'get_discovered_risk_names',
'get_topic_distribution',
'clean_clause_text',
'extract_risk_features'
]
for method in required_methods:
if hasattr(lda, method) and callable(getattr(lda, method)):
print(f" βœ… {method}(): Present")
else:
print(f" ❌ {method}(): MISSING")
raise AttributeError(f"Missing method: {method}")
# Test discover_risk_patterns
print("\n🎯 Step 5: Testing discover_risk_patterns()...")
sample_clauses = [
"The party shall indemnify and hold harmless all damages and losses.",
"This agreement shall be governed by the laws of the state of California.",
"Payment shall be made within thirty days of invoice date.",
"The licensee must not disclose confidential information to third parties.",
"Company agrees to comply with all applicable laws and regulations."
]
results = lda.discover_risk_patterns(sample_clauses)
print(f" βœ… Discovered {len(lda.discovered_patterns)} patterns")
# Test extract_risk_features
print("\nβš™οΈ Step 6: Testing extract_risk_features()...")
test_clause = "The party shall indemnify and hold harmless against all liability."
features = lda.extract_risk_features(test_clause)
print(f" βœ… Extracted {len(features)} features")
print(f" πŸ“Š Sample features:")
print(f" - risk_intensity: {features.get('risk_intensity', 0):.3f}")
print(f" - obligation_strength: {features.get('obligation_strength', 0):.3f}")
print(f" - legal_complexity: {features.get('legal_complexity', 0):.3f}")
print(f" - liability_terms_density: {features.get('liability_terms_density', 0):.3f}")
# Verify feature types
for key, value in features.items():
if not isinstance(value, (int, float, np.integer, np.floating)):
print(f" ❌ Feature '{key}' has wrong type: {type(value)}")
raise TypeError(f"Feature '{key}' must be numeric")
print(f" βœ… All {len(features)} features are numeric")
# Test clean_clause_text
print("\n🧹 Step 7: Testing clean_clause_text()...")
dirty_text = " This is a test clause with extra spaces. "
clean_text = lda.clean_clause_text(dirty_text)
print(f" Before: '{dirty_text}'")
print(f" After: '{clean_text}'")
print(f" βœ… Text cleaned successfully")
# Test get_risk_labels
print("\n🏷️ Step 8: Testing get_risk_labels()...")
new_clauses = [
"Party agrees to indemnify all damages.",
"Governed by California law."
]
labels = lda.get_risk_labels(new_clauses)
print(f" βœ… Labels: {labels}")
print(f" βœ… Type: {type(labels)}")
print(f" βœ… Length: {len(labels)}")
# Test get_topic_distribution
print("\nπŸ“Š Step 9: Testing get_topic_distribution()...")
dist = lda.get_topic_distribution(new_clauses)
print(f" βœ… Distribution shape: {dist.shape}")
print(f" βœ… Sample distribution: {dist[0]}")
print(f" βœ… Sum per document: {dist.sum(axis=1)}")
# Test get_discovered_risk_names
print("\nπŸ“ Step 10: Testing get_discovered_risk_names()...")
names = lda.get_discovered_risk_names()
print(f" βœ… Risk names: {names[:3]}...")
print(f" βœ… Total names: {len(names)}")
print("\n" + "=" * 60)
print("πŸŽ‰ ALL TESTS PASSED!")
print("=" * 60)
print("\nβœ… LDARiskDiscovery is fully compatible with trainer")
print("βœ… All required methods implemented")
print("βœ… All required attributes present")
print("\nπŸš€ Ready to run: python3 train.py")
except ImportError as e:
print(f"\n❌ Import error: {e}")
print(" sklearn may not be installed")
exit(1)
except AttributeError as e:
print(f"\n❌ Attribute error: {e}")
print(" Missing required method or attribute")
exit(1)
except Exception as e:
print(f"\n❌ Test failed: {e}")
import traceback
traceback.print_exc()
exit(1)