abc123 / crossword-app /backend-py /test_difficulty_softmax.py
vimalk78's picture
feat: implement difficulty-aware word selection with frequency percentiles
676533d
#!/usr/bin/env python3
"""
Test script demonstrating difficulty-aware softmax selection with frequency percentiles.
This script shows how the extended softmax approach incorporates both semantic similarity
and word frequency percentiles to create difficulty-aware probability distributions.
"""
import os
import sys
import numpy as np
# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
def test_difficulty_aware_selection():
"""Test difficulty-aware softmax selection across different difficulty levels."""
print("πŸ§ͺ Testing difficulty-aware softmax selection...")
# Set up environment for softmax selection
os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
os.environ['USE_SOFTMAX_SELECTION'] = 'true'
os.environ['DIFFICULTY_WEIGHT'] = '0.3'
from services.thematic_word_service import ThematicWordService
# Create service instance
service = ThematicWordService()
service.initialize()
# Test configuration loading
print(f"βœ… Configuration:")
print(f" Temperature: {service.similarity_temperature}")
print(f" Softmax enabled: {service.use_softmax_selection}")
print(f" Difficulty weight: {service.difficulty_weight}")
# Test theme
theme = "animals"
difficulties = ["easy", "medium", "hard"]
print(f"\n🎯 Testing theme: '{theme}' across difficulty levels")
for difficulty in difficulties:
print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
# Generate words for each difficulty
words = service.generate_thematic_words(
[theme],
num_words=10,
difficulty=difficulty
)
print(f" Selected words:")
for word, similarity, tier in words:
percentile = service.word_percentiles.get(word.lower(), 0.0)
print(f" {word}: similarity={similarity:.3f}, percentile={percentile:.3f} ({tier})")
print("\nβœ… Difficulty-aware selection test completed!")
def test_composite_scoring():
"""Test the composite scoring function directly."""
print("\nπŸ§ͺ Testing composite scoring function...")
os.environ['DIFFICULTY_WEIGHT'] = '0.4' # Higher weight for demonstration
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Mock test data - words with different frequency characteristics
test_words = [
("CAT", 0.8), # Common word, high similarity
("ELEPHANT", 0.9), # Moderately common, very high similarity
("QUETZAL", 0.7), # Rare word, good similarity
("DOG", 0.75), # Very common, good similarity
("PLATYPUS", 0.85) # Rare word, high similarity
]
print(f"🎯 Testing composite scoring with difficulty weight: {service.difficulty_weight}")
for difficulty in ["easy", "medium", "hard"]:
print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
scored_words = []
for word, similarity in test_words:
composite = service._compute_composite_score(similarity, word, difficulty)
percentile = service.word_percentiles.get(word.lower(), 0.0)
scored_words.append((word, similarity, percentile, composite))
# Sort by composite score to show ranking
scored_words.sort(key=lambda x: x[3], reverse=True)
print(" Word ranking by composite score:")
for word, sim, perc, comp in scored_words:
print(f" {word}: similarity={sim:.3f}, percentile={perc:.3f}, composite={comp:.3f}")
def test_probability_distributions():
"""Test how probability distributions change with difficulty."""
print("\nπŸ§ͺ Testing probability distributions across difficulties...")
os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
os.environ['DIFFICULTY_WEIGHT'] = '0.3'
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Create mock candidates with varied frequency profiles
candidates = [
{"word": "CAT", "similarity": 0.8, "tier": "tier_3_very_common"},
{"word": "DOG", "similarity": 0.75, "tier": "tier_2_extremely_common"},
{"word": "ELEPHANT", "similarity": 0.9, "tier": "tier_6_moderately_common"},
{"word": "TIGER", "similarity": 0.85, "tier": "tier_7_somewhat_uncommon"},
{"word": "QUETZAL", "similarity": 0.7, "tier": "tier_9_rare"},
{"word": "PLATYPUS", "similarity": 0.8, "tier": "tier_10_very_rare"}
]
print("🎯 Analyzing selection probability distributions:")
for difficulty in ["easy", "medium", "hard"]:
print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
# Run multiple selections to estimate probabilities
selections = {}
num_trials = 100
for _ in range(num_trials):
selected = service._softmax_weighted_selection(
candidates.copy(),
num_words=3,
difficulty=difficulty
)
for word_data in selected:
word = word_data["word"]
selections[word] = selections.get(word, 0) + 1
# Calculate and display probabilities
print(" Selection probabilities:")
for word_data in candidates:
word = word_data["word"]
probability = selections.get(word, 0) / num_trials
percentile = service.word_percentiles.get(word.lower(), 0.0)
print(f" {word}: {probability:.2f} (percentile: {percentile:.3f})")
def test_environment_configuration():
"""Test different environment variable configurations."""
print("\nπŸ§ͺ Testing environment configuration scenarios...")
scenarios = [
{"DIFFICULTY_WEIGHT": "0.1", "desc": "Low difficulty influence"},
{"DIFFICULTY_WEIGHT": "0.3", "desc": "Balanced (default)"},
{"DIFFICULTY_WEIGHT": "0.5", "desc": "High difficulty influence"},
{"DIFFICULTY_WEIGHT": "0.8", "desc": "Frequency-dominant"}
]
for scenario in scenarios:
print(f"\nπŸ“Š Scenario: {scenario['desc']} (weight={scenario['DIFFICULTY_WEIGHT']})")
# Set environment
for key, value in scenario.items():
if key != "desc":
os.environ[key] = value
# Test with fresh service
if 'services.thematic_word_service' in sys.modules:
del sys.modules['services.thematic_word_service']
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
print(f" Configuration loaded: difficulty_weight={service.difficulty_weight}")
# Test composite scoring for different words
test_cases = [
("CAT", 0.8, "easy"), # Common word, easy difficulty
("QUETZAL", 0.7, "hard") # Rare word, hard difficulty
]
for word, sim, diff in test_cases:
composite = service._compute_composite_score(sim, word, diff)
percentile = service.word_percentiles.get(word.lower(), 0.0) if hasattr(service, 'word_percentiles') and service.word_percentiles else 0.0
print(f" {word} ({diff}): similarity={sim:.3f}, percentile={percentile:.3f}, composite={composite:.3f}")
if __name__ == "__main__":
print("πŸš€ Difficulty-Aware Softmax Selection Test Suite")
print("=" * 60)
test_difficulty_aware_selection()
test_composite_scoring()
test_probability_distributions()
test_environment_configuration()
print("\n" + "=" * 60)
print("πŸŽ‰ All tests completed successfully!")
print("\nπŸ“‹ Summary of features:")
print(" β€’ Continuous frequency percentiles replace discrete tiers")
print(" β€’ Difficulty-aware composite scoring (similarity + frequency alignment)")
print(" β€’ Configurable difficulty weight via DIFFICULTY_WEIGHT environment variable")
print(" β€’ Smooth probability distributions for easy/medium/hard selection")
print(" β€’ Gaussian peaks for optimal frequency ranges per difficulty")
print("\nπŸš€ Ready for production use with crossword backend!")