Spaces:

vimalk78
/

abc123

Sleeping

App Files Files Community

abc123 / crossword-app /backend-py /test_difficulty_softmax.py

vimalk78

feat: implement difficulty-aware word selection with frequency percentiles

676533d 3 months ago

raw

history blame contribute delete

8.36 kB

	#!/usr/bin/env python3
	"""
	Test script demonstrating difficulty-aware softmax selection with frequency percentiles.

	This script shows how the extended softmax approach incorporates both semantic similarity
	and word frequency percentiles to create difficulty-aware probability distributions.
	"""

	import os
	import sys
	import numpy as np

	# Add src directory to path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

	def test_difficulty_aware_selection():
	"""Test difficulty-aware softmax selection across different difficulty levels."""
	print("🧪 Testing difficulty-aware softmax selection...")

	# Set up environment for softmax selection
	os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
	os.environ['USE_SOFTMAX_SELECTION'] = 'true'
	os.environ['DIFFICULTY_WEIGHT'] = '0.3'

	from services.thematic_word_service import ThematicWordService

	# Create service instance
	service = ThematicWordService()
	service.initialize()

	# Test configuration loading
	print(f"✅ Configuration:")
	print(f" Temperature: {service.similarity_temperature}")
	print(f" Softmax enabled: {service.use_softmax_selection}")
	print(f" Difficulty weight: {service.difficulty_weight}")

	# Test theme
	theme = "animals"
	difficulties = ["easy", "medium", "hard"]

	print(f"\n🎯 Testing theme: '{theme}' across difficulty levels")

	for difficulty in difficulties:
	print(f"\n📊 Difficulty: {difficulty.upper()}")

	# Generate words for each difficulty
	words = service.generate_thematic_words(
	[theme],
	num_words=10,
	difficulty=difficulty
	)

	print(f" Selected words:")
	for word, similarity, tier in words:
	percentile = service.word_percentiles.get(word.lower(), 0.0)
	print(f" {word}: similarity={similarity:.3f}, percentile={percentile:.3f} ({tier})")

	print("\n✅ Difficulty-aware selection test completed!")

	def test_composite_scoring():
	"""Test the composite scoring function directly."""
	print("\n🧪 Testing composite scoring function...")

	os.environ['DIFFICULTY_WEIGHT'] = '0.4' # Higher weight for demonstration

	from services.thematic_word_service import ThematicWordService

	service = ThematicWordService()
	service.initialize()

	# Mock test data - words with different frequency characteristics
	test_words = [
	("CAT", 0.8), # Common word, high similarity
	("ELEPHANT", 0.9), # Moderately common, very high similarity
	("QUETZAL", 0.7), # Rare word, good similarity
	("DOG", 0.75), # Very common, good similarity
	("PLATYPUS", 0.85) # Rare word, high similarity
	]

	print(f"🎯 Testing composite scoring with difficulty weight: {service.difficulty_weight}")

	for difficulty in ["easy", "medium", "hard"]:
	print(f"\n📊 Difficulty: {difficulty.upper()}")

	scored_words = []
	for word, similarity in test_words:
	composite = service._compute_composite_score(similarity, word, difficulty)
	percentile = service.word_percentiles.get(word.lower(), 0.0)
	scored_words.append((word, similarity, percentile, composite))

	# Sort by composite score to show ranking
	scored_words.sort(key=lambda x: x[3], reverse=True)

	print(" Word ranking by composite score:")
	for word, sim, perc, comp in scored_words:
	print(f" {word}: similarity={sim:.3f}, percentile={perc:.3f}, composite={comp:.3f}")

	def test_probability_distributions():
	"""Test how probability distributions change with difficulty."""
	print("\n🧪 Testing probability distributions across difficulties...")

	os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
	os.environ['DIFFICULTY_WEIGHT'] = '0.3'

	from services.thematic_word_service import ThematicWordService

	service = ThematicWordService()
	service.initialize()

	# Create mock candidates with varied frequency profiles
	candidates = [
	{"word": "CAT", "similarity": 0.8, "tier": "tier_3_very_common"},
	{"word": "DOG", "similarity": 0.75, "tier": "tier_2_extremely_common"},
	{"word": "ELEPHANT", "similarity": 0.9, "tier": "tier_6_moderately_common"},
	{"word": "TIGER", "similarity": 0.85, "tier": "tier_7_somewhat_uncommon"},
	{"word": "QUETZAL", "similarity": 0.7, "tier": "tier_9_rare"},
	{"word": "PLATYPUS", "similarity": 0.8, "tier": "tier_10_very_rare"}
	]

	print("🎯 Analyzing selection probability distributions:")

	for difficulty in ["easy", "medium", "hard"]:
	print(f"\n📊 Difficulty: {difficulty.upper()}")

	# Run multiple selections to estimate probabilities
	selections = {}
	num_trials = 100

	for _ in range(num_trials):
	selected = service._softmax_weighted_selection(
	candidates.copy(),
	num_words=3,
	difficulty=difficulty
	)
	for word_data in selected:
	word = word_data["word"]
	selections[word] = selections.get(word, 0) + 1

	# Calculate and display probabilities
	print(" Selection probabilities:")
	for word_data in candidates:
	word = word_data["word"]
	probability = selections.get(word, 0) / num_trials
	percentile = service.word_percentiles.get(word.lower(), 0.0)
	print(f" {word}: {probability:.2f} (percentile: {percentile:.3f})")

	def test_environment_configuration():
	"""Test different environment variable configurations."""
	print("\n🧪 Testing environment configuration scenarios...")

	scenarios = [
	{"DIFFICULTY_WEIGHT": "0.1", "desc": "Low difficulty influence"},
	{"DIFFICULTY_WEIGHT": "0.3", "desc": "Balanced (default)"},
	{"DIFFICULTY_WEIGHT": "0.5", "desc": "High difficulty influence"},
	{"DIFFICULTY_WEIGHT": "0.8", "desc": "Frequency-dominant"}
	]

	for scenario in scenarios:
	print(f"\n📊 Scenario: {scenario['desc']} (weight={scenario['DIFFICULTY_WEIGHT']})")

	# Set environment
	for key, value in scenario.items():
	if key != "desc":
	os.environ[key] = value

	# Test with fresh service
	if 'services.thematic_word_service' in sys.modules:
	del sys.modules['services.thematic_word_service']

	from services.thematic_word_service import ThematicWordService
	service = ThematicWordService()

	print(f" Configuration loaded: difficulty_weight={service.difficulty_weight}")

	# Test composite scoring for different words
	test_cases = [
	("CAT", 0.8, "easy"), # Common word, easy difficulty
	("QUETZAL", 0.7, "hard") # Rare word, hard difficulty
	]

	for word, sim, diff in test_cases:
	composite = service._compute_composite_score(sim, word, diff)
	percentile = service.word_percentiles.get(word.lower(), 0.0) if hasattr(service, 'word_percentiles') and service.word_percentiles else 0.0
	print(f" {word} ({diff}): similarity={sim:.3f}, percentile={percentile:.3f}, composite={composite:.3f}")

	if __name__ == "__main__":
	print("🚀 Difficulty-Aware Softmax Selection Test Suite")
	print("=" * 60)

	test_difficulty_aware_selection()
	test_composite_scoring()
	test_probability_distributions()
	test_environment_configuration()

	print("\n" + "=" * 60)
	print("🎉 All tests completed successfully!")
	print("\n📋 Summary of features:")
	print(" • Continuous frequency percentiles replace discrete tiers")
	print(" • Difficulty-aware composite scoring (similarity + frequency alignment)")
	print(" • Configurable difficulty weight via DIFFICULTY_WEIGHT environment variable")
	print(" • Smooth probability distributions for easy/medium/hard selection")
	print(" • Gaussian peaks for optimal frequency ranges per difficulty")
	print("\n🚀 Ready for production use with crossword backend!")