| | |
| | """ |
| | Quick Test: Improved Prompt Engineering |
| | Test the improved prompts and validation on a few examples to see if clue quality improved. |
| | """ |
| |
|
| | import sys |
| | import logging |
| | from pathlib import Path |
| |
|
| | |
| | sys.path.insert(0, str(Path(__file__).parent)) |
| |
|
| | try: |
| | from llm_clue_generator import LLMClueGenerator |
| | GENERATOR_AVAILABLE = True |
| | except ImportError as e: |
| | print(f"β Import error: {e}") |
| | GENERATOR_AVAILABLE = False |
| |
|
| | |
| | logging.basicConfig( |
| | level=logging.DEBUG, |
| | format='%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s', |
| | datefmt='%Y-%m-%d %H:%M:%S' |
| | ) |
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def test_improved_prompts(): |
| | """Test improved prompt engineering with problematic examples.""" |
| | if not GENERATOR_AVAILABLE: |
| | print("β Cannot run test - LLM generator not available") |
| | return |
| | |
| | print("π§ͺ Testing Improved Prompt Engineering") |
| | print("=" * 60) |
| | |
| | |
| | print("π Initializing LLM clue generator...") |
| | generator = LLMClueGenerator() |
| | |
| | try: |
| | generator.initialize() |
| | print("β
Generator initialized successfully") |
| | except Exception as e: |
| | print(f"β Failed to initialize generator: {e}") |
| | return |
| | |
| | |
| | test_cases = [ |
| | |
| | ("CAT", "animals", "definition"), |
| | ("KITTY", "animals", "description"), |
| | ("MEAL", "food", "category"), |
| | ("HUNGER", "food", "simple"), |
| | ("TECH", "technology", "category"), |
| | ("SCIENTIST", "science", "trivia"), |
| | |
| | |
| | ("DOG", "animals", "definition"), |
| | ("PYTHON", "technology", "description"), |
| | ("GUITAR", "music", "category"), |
| | ] |
| | |
| | print(f"\nπ― Testing {len(test_cases)} word-topic combinations") |
| | print("=" * 60) |
| | |
| | successful_clues = 0 |
| | total_tests = len(test_cases) |
| | |
| | for word, topic, style in test_cases: |
| | print(f"\nπ Testing: '{word}' + '{topic}' (style: {style})") |
| | print("-" * 40) |
| | |
| | try: |
| | |
| | candidates = generator.generate_clue_candidates( |
| | word=word, |
| | topic=topic, |
| | clue_style=style, |
| | difficulty="medium", |
| | num_candidates=3 |
| | ) |
| | |
| | print(f"Generated {len(candidates)} candidates:") |
| | for i, candidate in enumerate(candidates, 1): |
| | print(f" {i}. {candidate}") |
| | |
| | |
| | best_clue = generator.generate_clue( |
| | word=word, |
| | topic=topic, |
| | clue_style=style, |
| | difficulty="medium" |
| | ) |
| | |
| | print(f"\nπ Best clue: {best_clue}") |
| | |
| | |
| | if best_clue and len(best_clue) > 5 and word.lower() not in best_clue.lower(): |
| | successful_clues += 1 |
| | print("β
Quality: GOOD") |
| | else: |
| | print("β Quality: POOR") |
| | |
| | except Exception as e: |
| | print(f"β Error generating clue: {e}") |
| | |
| | print(f"\n" + "=" * 60) |
| | print(f"π RESULTS SUMMARY") |
| | print(f"=" * 60) |
| | print(f"Total tests: {total_tests}") |
| | print(f"Successful clues: {successful_clues}") |
| | print(f"Success rate: {(successful_clues/total_tests)*100:.1f}%") |
| | |
| | if successful_clues >= total_tests * 0.7: |
| | print("π Improved prompts show significant improvement!") |
| | elif successful_clues >= total_tests * 0.4: |
| | print("π Some improvement, but may need model upgrade") |
| | else: |
| | print("β Prompts still not effective, recommend semantic template approach") |
| |
|
| |
|
| | def main(): |
| | """Run the prompt improvement test.""" |
| | test_improved_prompts() |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |