#!/usr/bin/env python3
"""
Test script demonstrating difficulty-aware softmax selection with frequency percentiles.

This script shows how the extended softmax approach incorporates both semantic similarity
and word frequency percentiles to create difficulty-aware probability distributions.
"""

import os
import sys
import numpy as np

# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

def test_difficulty_aware_selection():
    """Test difficulty-aware softmax selection across different difficulty levels."""
    print("🧪 Testing difficulty-aware softmax selection...")
    
    # Set up environment for softmax selection
    os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
    os.environ['USE_SOFTMAX_SELECTION'] = 'true'
    os.environ['DIFFICULTY_WEIGHT'] = '0.3'
    
    from services.thematic_word_service import ThematicWordService
    
    # Create service instance
    service = ThematicWordService()
    service.initialize()
    
    # Test configuration loading
    print(f"✅ Configuration:")
    print(f"   Temperature: {service.similarity_temperature}")
    print(f"   Softmax enabled: {service.use_softmax_selection}")
    print(f"   Difficulty weight: {service.difficulty_weight}")
    
    # Test theme
    theme = "animals"
    difficulties = ["easy", "medium", "hard"]
    
    print(f"\n🎯 Testing theme: '{theme}' across difficulty levels")
    
    for difficulty in difficulties:
        print(f"\n📊 Difficulty: {difficulty.upper()}")
        
        # Generate words for each difficulty
        words = service.generate_thematic_words(
            [theme], 
            num_words=10, 
            difficulty=difficulty
        )
        
        print(f"   Selected words:")
        for word, similarity, tier in words:
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            print(f"      {word}: similarity={similarity:.3f}, percentile={percentile:.3f} ({tier})")
    
    print("\n✅ Difficulty-aware selection test completed!")

def test_composite_scoring():
    """Test the composite scoring function directly."""
    print("\n🧪 Testing composite scoring function...")
    
    os.environ['DIFFICULTY_WEIGHT'] = '0.4'  # Higher weight for demonstration
    
    from services.thematic_word_service import ThematicWordService
    
    service = ThematicWordService()
    service.initialize()
    
    # Mock test data - words with different frequency characteristics
    test_words = [
        ("CAT", 0.8),      # Common word, high similarity
        ("ELEPHANT", 0.9), # Moderately common, very high similarity  
        ("QUETZAL", 0.7),  # Rare word, good similarity
        ("DOG", 0.75),     # Very common, good similarity
        ("PLATYPUS", 0.85) # Rare word, high similarity
    ]
    
    print(f"🎯 Testing composite scoring with difficulty weight: {service.difficulty_weight}")
    
    for difficulty in ["easy", "medium", "hard"]:
        print(f"\n📊 Difficulty: {difficulty.upper()}")
        
        scored_words = []
        for word, similarity in test_words:
            composite = service._compute_composite_score(similarity, word, difficulty)
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            scored_words.append((word, similarity, percentile, composite))
        
        # Sort by composite score to show ranking
        scored_words.sort(key=lambda x: x[3], reverse=True)
        
        print("   Word ranking by composite score:")
        for word, sim, perc, comp in scored_words:
            print(f"      {word}: similarity={sim:.3f}, percentile={perc:.3f}, composite={comp:.3f}")

def test_probability_distributions():
    """Test how probability distributions change with difficulty."""
    print("\n🧪 Testing probability distributions across difficulties...")
    
    os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
    os.environ['DIFFICULTY_WEIGHT'] = '0.3'
    
    from services.thematic_word_service import ThematicWordService
    
    service = ThematicWordService()
    service.initialize()
    
    # Create mock candidates with varied frequency profiles
    candidates = [
        {"word": "CAT", "similarity": 0.8, "tier": "tier_3_very_common"},
        {"word": "DOG", "similarity": 0.75, "tier": "tier_2_extremely_common"}, 
        {"word": "ELEPHANT", "similarity": 0.9, "tier": "tier_6_moderately_common"},
        {"word": "TIGER", "similarity": 0.85, "tier": "tier_7_somewhat_uncommon"},
        {"word": "QUETZAL", "similarity": 0.7, "tier": "tier_9_rare"},
        {"word": "PLATYPUS", "similarity": 0.8, "tier": "tier_10_very_rare"}
    ]
    
    print("🎯 Analyzing selection probability distributions:")
    
    for difficulty in ["easy", "medium", "hard"]:
        print(f"\n📊 Difficulty: {difficulty.upper()}")
        
        # Run multiple selections to estimate probabilities
        selections = {}
        num_trials = 100
        
        for _ in range(num_trials):
            selected = service._softmax_weighted_selection(
                candidates.copy(), 
                num_words=3, 
                difficulty=difficulty
            )
            for word_data in selected:
                word = word_data["word"]
                selections[word] = selections.get(word, 0) + 1
        
        # Calculate and display probabilities
        print("   Selection probabilities:")
        for word_data in candidates:
            word = word_data["word"]
            probability = selections.get(word, 0) / num_trials
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            print(f"      {word}: {probability:.2f} (percentile: {percentile:.3f})")

def test_environment_configuration():
    """Test different environment variable configurations."""
    print("\n🧪 Testing environment configuration scenarios...")
    
    scenarios = [
        {"DIFFICULTY_WEIGHT": "0.1", "desc": "Low difficulty influence"},
        {"DIFFICULTY_WEIGHT": "0.3", "desc": "Balanced (default)"},
        {"DIFFICULTY_WEIGHT": "0.5", "desc": "High difficulty influence"},
        {"DIFFICULTY_WEIGHT": "0.8", "desc": "Frequency-dominant"}
    ]
    
    for scenario in scenarios:
        print(f"\n📊 Scenario: {scenario['desc']} (weight={scenario['DIFFICULTY_WEIGHT']})")
        
        # Set environment
        for key, value in scenario.items():
            if key != "desc":
                os.environ[key] = value
        
        # Test with fresh service
        if 'services.thematic_word_service' in sys.modules:
            del sys.modules['services.thematic_word_service']
        
        from services.thematic_word_service import ThematicWordService
        service = ThematicWordService()
        
        print(f"   Configuration loaded: difficulty_weight={service.difficulty_weight}")
        
        # Test composite scoring for different words
        test_cases = [
            ("CAT", 0.8, "easy"),    # Common word, easy difficulty 
            ("QUETZAL", 0.7, "hard") # Rare word, hard difficulty
        ]
        
        for word, sim, diff in test_cases:
            composite = service._compute_composite_score(sim, word, diff)
            percentile = service.word_percentiles.get(word.lower(), 0.0) if hasattr(service, 'word_percentiles') and service.word_percentiles else 0.0
            print(f"      {word} ({diff}): similarity={sim:.3f}, percentile={percentile:.3f}, composite={composite:.3f}")

if __name__ == "__main__":
    print("🚀 Difficulty-Aware Softmax Selection Test Suite")
    print("=" * 60)
    
    test_difficulty_aware_selection()
    test_composite_scoring()
    test_probability_distributions()
    test_environment_configuration()
    
    print("\n" + "=" * 60)
    print("🎉 All tests completed successfully!")
    print("\n📋 Summary of features:")
    print("   • Continuous frequency percentiles replace discrete tiers")
    print("   • Difficulty-aware composite scoring (similarity + frequency alignment)")
    print("   • Configurable difficulty weight via DIFFICULTY_WEIGHT environment variable")
    print("   • Smooth probability distributions for easy/medium/hard selection")
    print("   • Gaussian peaks for optimal frequency ranges per difficulty")
    print("\n🚀 Ready for production use with crossword backend!")