|
|
""" |
|
|
Comprehensive test for LDARiskDiscovery compatibility with trainer |
|
|
""" |
|
|
|
|
|
print("=" * 60) |
|
|
print("Testing LDARiskDiscovery Complete Interface") |
|
|
print("=" * 60) |
|
|
|
|
|
try: |
|
|
from risk_discovery import LDARiskDiscovery |
|
|
import numpy as np |
|
|
|
|
|
print("\nβ
Step 1: Import successful") |
|
|
|
|
|
|
|
|
print("\nπ§ Step 2: Creating LDARiskDiscovery instance...") |
|
|
lda = LDARiskDiscovery(n_clusters=3) |
|
|
print(" β
Instance created") |
|
|
|
|
|
|
|
|
print("\nπ Step 3: Checking required attributes...") |
|
|
required_attrs = [ |
|
|
'n_clusters', |
|
|
'discovered_patterns', |
|
|
'cluster_labels', |
|
|
'feature_matrix', |
|
|
'legal_indicators', |
|
|
'complexity_indicators' |
|
|
] |
|
|
|
|
|
for attr in required_attrs: |
|
|
if hasattr(lda, attr): |
|
|
print(f" β
{attr}: Present") |
|
|
else: |
|
|
print(f" β {attr}: MISSING") |
|
|
raise AttributeError(f"Missing attribute: {attr}") |
|
|
|
|
|
|
|
|
print("\nπ Step 4: Checking required methods...") |
|
|
required_methods = [ |
|
|
'discover_risk_patterns', |
|
|
'get_risk_labels', |
|
|
'get_discovered_risk_names', |
|
|
'get_topic_distribution', |
|
|
'clean_clause_text', |
|
|
'extract_risk_features' |
|
|
] |
|
|
|
|
|
for method in required_methods: |
|
|
if hasattr(lda, method) and callable(getattr(lda, method)): |
|
|
print(f" β
{method}(): Present") |
|
|
else: |
|
|
print(f" β {method}(): MISSING") |
|
|
raise AttributeError(f"Missing method: {method}") |
|
|
|
|
|
|
|
|
print("\nπ― Step 5: Testing discover_risk_patterns()...") |
|
|
sample_clauses = [ |
|
|
"The party shall indemnify and hold harmless all damages and losses.", |
|
|
"This agreement shall be governed by the laws of the state of California.", |
|
|
"Payment shall be made within thirty days of invoice date.", |
|
|
"The licensee must not disclose confidential information to third parties.", |
|
|
"Company agrees to comply with all applicable laws and regulations." |
|
|
] |
|
|
|
|
|
results = lda.discover_risk_patterns(sample_clauses) |
|
|
print(f" β
Discovered {len(lda.discovered_patterns)} patterns") |
|
|
|
|
|
|
|
|
print("\nβοΈ Step 6: Testing extract_risk_features()...") |
|
|
test_clause = "The party shall indemnify and hold harmless against all liability." |
|
|
features = lda.extract_risk_features(test_clause) |
|
|
|
|
|
print(f" β
Extracted {len(features)} features") |
|
|
print(f" π Sample features:") |
|
|
print(f" - risk_intensity: {features.get('risk_intensity', 0):.3f}") |
|
|
print(f" - obligation_strength: {features.get('obligation_strength', 0):.3f}") |
|
|
print(f" - legal_complexity: {features.get('legal_complexity', 0):.3f}") |
|
|
print(f" - liability_terms_density: {features.get('liability_terms_density', 0):.3f}") |
|
|
|
|
|
|
|
|
for key, value in features.items(): |
|
|
if not isinstance(value, (int, float, np.integer, np.floating)): |
|
|
print(f" β Feature '{key}' has wrong type: {type(value)}") |
|
|
raise TypeError(f"Feature '{key}' must be numeric") |
|
|
|
|
|
print(f" β
All {len(features)} features are numeric") |
|
|
|
|
|
|
|
|
print("\nπ§Ή Step 7: Testing clean_clause_text()...") |
|
|
dirty_text = " This is a test clause with extra spaces. " |
|
|
clean_text = lda.clean_clause_text(dirty_text) |
|
|
print(f" Before: '{dirty_text}'") |
|
|
print(f" After: '{clean_text}'") |
|
|
print(f" β
Text cleaned successfully") |
|
|
|
|
|
|
|
|
print("\nπ·οΈ Step 8: Testing get_risk_labels()...") |
|
|
new_clauses = [ |
|
|
"Party agrees to indemnify all damages.", |
|
|
"Governed by California law." |
|
|
] |
|
|
labels = lda.get_risk_labels(new_clauses) |
|
|
print(f" β
Labels: {labels}") |
|
|
print(f" β
Type: {type(labels)}") |
|
|
print(f" β
Length: {len(labels)}") |
|
|
|
|
|
|
|
|
print("\nπ Step 9: Testing get_topic_distribution()...") |
|
|
dist = lda.get_topic_distribution(new_clauses) |
|
|
print(f" β
Distribution shape: {dist.shape}") |
|
|
print(f" β
Sample distribution: {dist[0]}") |
|
|
print(f" β
Sum per document: {dist.sum(axis=1)}") |
|
|
|
|
|
|
|
|
print("\nπ Step 10: Testing get_discovered_risk_names()...") |
|
|
names = lda.get_discovered_risk_names() |
|
|
print(f" β
Risk names: {names[:3]}...") |
|
|
print(f" β
Total names: {len(names)}") |
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("π ALL TESTS PASSED!") |
|
|
print("=" * 60) |
|
|
print("\nβ
LDARiskDiscovery is fully compatible with trainer") |
|
|
print("β
All required methods implemented") |
|
|
print("β
All required attributes present") |
|
|
print("\nπ Ready to run: python3 train.py") |
|
|
|
|
|
except ImportError as e: |
|
|
print(f"\nβ Import error: {e}") |
|
|
print(" sklearn may not be installed") |
|
|
exit(1) |
|
|
|
|
|
except AttributeError as e: |
|
|
print(f"\nβ Attribute error: {e}") |
|
|
print(" Missing required method or attribute") |
|
|
exit(1) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\nβ Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
exit(1) |
|
|
|