code2-repo / test_lda_complete.py

Deepu1965

Upload folder using huggingface_hub

9b1c753 verified about 2 months ago

5.28 kB

	"""
	Comprehensive test for LDARiskDiscovery compatibility with trainer
	"""

	print("=" * 60)
	print("Testing LDARiskDiscovery Complete Interface")
	print("=" * 60)

	try:
	from risk_discovery import LDARiskDiscovery
	import numpy as np

	print("\n✅ Step 1: Import successful")

	# Create instance
	print("\n🔧 Step 2: Creating LDARiskDiscovery instance...")
	lda = LDARiskDiscovery(n_clusters=3)
	print(" ✅ Instance created")

	# Check all required attributes
	print("\n📋 Step 3: Checking required attributes...")
	required_attrs = [
	'n_clusters',
	'discovered_patterns',
	'cluster_labels',
	'feature_matrix',
	'legal_indicators',
	'complexity_indicators'
	]

	for attr in required_attrs:
	if hasattr(lda, attr):
	print(f" ✅ {attr}: Present")
	else:
	print(f" ❌ {attr}: MISSING")
	raise AttributeError(f"Missing attribute: {attr}")

	# Check all required methods
	print("\n🔍 Step 4: Checking required methods...")
	required_methods = [
	'discover_risk_patterns',
	'get_risk_labels',
	'get_discovered_risk_names',
	'get_topic_distribution',
	'clean_clause_text',
	'extract_risk_features'
	]

	for method in required_methods:
	if hasattr(lda, method) and callable(getattr(lda, method)):
	print(f" ✅ {method}(): Present")
	else:
	print(f" ❌ {method}(): MISSING")
	raise AttributeError(f"Missing method: {method}")

	# Test discover_risk_patterns
	print("\n🎯 Step 5: Testing discover_risk_patterns()...")
	sample_clauses = [
	"The party shall indemnify and hold harmless all damages and losses.",
	"This agreement shall be governed by the laws of the state of California.",
	"Payment shall be made within thirty days of invoice date.",
	"The licensee must not disclose confidential information to third parties.",
	"Company agrees to comply with all applicable laws and regulations."
	]

	results = lda.discover_risk_patterns(sample_clauses)
	print(f" ✅ Discovered {len(lda.discovered_patterns)} patterns")

	# Test extract_risk_features
	print("\n⚙️ Step 6: Testing extract_risk_features()...")
	test_clause = "The party shall indemnify and hold harmless against all liability."
	features = lda.extract_risk_features(test_clause)

	print(f" ✅ Extracted {len(features)} features")
	print(f" 📊 Sample features:")
	print(f" - risk_intensity: {features.get('risk_intensity', 0):.3f}")
	print(f" - obligation_strength: {features.get('obligation_strength', 0):.3f}")
	print(f" - legal_complexity: {features.get('legal_complexity', 0):.3f}")
	print(f" - liability_terms_density: {features.get('liability_terms_density', 0):.3f}")

	# Verify feature types
	for key, value in features.items():
	if not isinstance(value, (int, float, np.integer, np.floating)):
	print(f" ❌ Feature '{key}' has wrong type: {type(value)}")
	raise TypeError(f"Feature '{key}' must be numeric")

	print(f" ✅ All {len(features)} features are numeric")

	# Test clean_clause_text
	print("\n🧹 Step 7: Testing clean_clause_text()...")
	dirty_text = " This is a test clause with extra spaces. "
	clean_text = lda.clean_clause_text(dirty_text)
	print(f" Before: '{dirty_text}'")
	print(f" After: '{clean_text}'")
	print(f" ✅ Text cleaned successfully")

	# Test get_risk_labels
	print("\n🏷️ Step 8: Testing get_risk_labels()...")
	new_clauses = [
	"Party agrees to indemnify all damages.",
	"Governed by California law."
	]
	labels = lda.get_risk_labels(new_clauses)
	print(f" ✅ Labels: {labels}")
	print(f" ✅ Type: {type(labels)}")
	print(f" ✅ Length: {len(labels)}")

	# Test get_topic_distribution
	print("\n📊 Step 9: Testing get_topic_distribution()...")
	dist = lda.get_topic_distribution(new_clauses)
	print(f" ✅ Distribution shape: {dist.shape}")
	print(f" ✅ Sample distribution: {dist[0]}")
	print(f" ✅ Sum per document: {dist.sum(axis=1)}")

	# Test get_discovered_risk_names
	print("\n📝 Step 10: Testing get_discovered_risk_names()...")
	names = lda.get_discovered_risk_names()
	print(f" ✅ Risk names: {names[:3]}...")
	print(f" ✅ Total names: {len(names)}")

	print("\n" + "=" * 60)
	print("🎉 ALL TESTS PASSED!")
	print("=" * 60)
	print("\n✅ LDARiskDiscovery is fully compatible with trainer")
	print("✅ All required methods implemented")
	print("✅ All required attributes present")
	print("\n🚀 Ready to run: python3 train.py")

	except ImportError as e:
	print(f"\n❌ Import error: {e}")
	print(" sklearn may not be installed")
	exit(1)

	except AttributeError as e:
	print(f"\n❌ Attribute error: {e}")
	print(" Missing required method or attribute")
	exit(1)

	except Exception as e:
	print(f"\n❌ Test failed: {e}")
	import traceback
	traceback.print_exc()
	exit(1)