abc123 / crossword-app /backend-py /debug_full_generation.py
vimalk78's picture
Add complete Python backend with AI-powered crossword generation
b01ceb3
raw
history blame
10.5 kB
#!/usr/bin/env python3
"""
Debug the complete crossword generation process to identify display/numbering issues.
"""
import asyncio
import sys
import json
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
from src.services.crossword_generator_fixed import CrosswordGeneratorFixed
async def debug_complete_generation():
"""Debug the complete crossword generation process."""
print("🔍 Debugging Complete Crossword Generation Process\n")
# Create generator with no vector service to use static words
generator = CrosswordGeneratorFixed(vector_service=None)
# Override the word selection to use controlled test words
test_words = [
{"word": "MACHINE", "clue": "Device with moving parts"},
{"word": "COMPUTER", "clue": "Electronic device"},
{"word": "EXPERT", "clue": "Person with specialized knowledge"},
{"word": "SCIENCE", "clue": "Systematic study"},
{"word": "TECHNOLOGY", "clue": "Applied science"},
{"word": "RESEARCH", "clue": "Systematic investigation"},
{"word": "ANALYSIS", "clue": "Detailed examination"},
{"word": "METHOD", "clue": "Systematic approach"}
]
# Mock the word selection method
async def mock_select_words(topics, difficulty, use_ai):
return test_words
generator._select_words = mock_select_words
print("=" * 70)
print("GENERATING COMPLETE CROSSWORD")
print("=" * 70)
try:
result = await generator.generate_puzzle(["technology"], "medium", use_ai=False)
if result:
print("✅ Crossword generation successful!")
# Analyze the complete result
analyze_crossword_result(result)
else:
print("❌ Crossword generation failed - returned None")
except Exception as e:
print(f"❌ Crossword generation failed with error: {e}")
import traceback
traceback.print_exc()
def analyze_crossword_result(result):
"""Analyze the complete crossword result for potential issues."""
print("\n" + "=" * 70)
print("CROSSWORD RESULT ANALYSIS")
print("=" * 70)
# Print basic metadata
metadata = result.get("metadata", {})
print("Metadata:")
for key, value in metadata.items():
print(f" {key}: {value}")
# Analyze the grid
grid = result.get("grid", [])
print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}")
print("\nGrid layout:")
print_numbered_grid(grid)
# Analyze placed words vs clues
clues = result.get("clues", [])
print(f"\nNumber of clues generated: {len(clues)}")
print("\nClue analysis:")
for i, clue in enumerate(clues):
print(f" Clue {i+1}:")
print(f" Number: {clue.get('number', 'MISSING')}")
print(f" Word: {clue.get('word', 'MISSING')}")
print(f" Direction: {clue.get('direction', 'MISSING')}")
print(f" Position: {clue.get('position', 'MISSING')}")
print(f" Text: {clue.get('text', 'MISSING')}")
# Check for potential issues
print("\n" + "=" * 70)
print("ISSUE DETECTION")
print("=" * 70)
check_word_boundary_consistency(grid, clues)
check_numbering_consistency(clues)
check_grid_word_alignment(grid, clues)
def print_numbered_grid(grid):
"""Print grid with coordinates for analysis."""
if not grid:
print(" Empty grid")
return
# Print column headers
print(" ", end="")
for c in range(len(grid[0])):
print(f"{c:2d}", end="")
print()
# Print rows with row numbers
for r in range(len(grid)):
print(f" {r:2d}: ", end="")
for c in range(len(grid[0])):
cell = grid[r][c]
if cell == ".":
print(" .", end="")
else:
print(f" {cell}", end="")
print()
def check_word_boundary_consistency(grid, clues):
"""Check if words in clues match what's actually in the grid."""
print("Checking word boundary consistency:")
issues_found = []
for clue in clues:
word = clue.get("word", "")
position = clue.get("position", {})
direction = clue.get("direction", "")
if not all([word, position, direction]):
issues_found.append(f"Incomplete clue data: {clue}")
continue
row = position.get("row", -1)
col = position.get("col", -1)
if row < 0 or col < 0:
issues_found.append(f"Invalid position for word '{word}': {position}")
continue
# Extract the actual word from the grid
grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
if grid_word != word:
issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'")
if issues_found:
print(" ❌ Issues found:")
for issue in issues_found:
print(f" {issue}")
else:
print(" ✅ All words match grid positions")
def extract_word_from_grid(grid, row, col, direction, expected_length):
"""Extract a word from the grid at the given position and direction."""
if row >= len(grid) or col >= len(grid[0]):
return "OUT_OF_BOUNDS"
word = ""
if direction == "across": # horizontal
for i in range(expected_length):
if col + i >= len(grid[0]):
return word + "TRUNCATED"
word += grid[row][col + i]
elif direction == "down": # vertical
for i in range(expected_length):
if row + i >= len(grid):
return word + "TRUNCATED"
word += grid[row + i][col]
return word
def check_numbering_consistency(clues):
"""Check if clue numbering is consistent and logical."""
print("\nChecking numbering consistency:")
numbers = [clue.get("number", -1) for clue in clues]
issues = []
# Check for duplicate numbers
if len(numbers) != len(set(numbers)):
issues.append("Duplicate clue numbers found")
# Check for missing numbers in sequence
if numbers:
min_num = min(numbers)
max_num = max(numbers)
expected = set(range(min_num, max_num + 1))
actual = set(numbers)
if expected != actual:
missing = expected - actual
extra = actual - expected
if missing:
issues.append(f"Missing numbers: {sorted(missing)}")
if extra:
issues.append(f"Extra numbers: {sorted(extra)}")
if issues:
print(" ❌ Numbering issues:")
for issue in issues:
print(f" {issue}")
else:
print(" ✅ Numbering is consistent")
def check_grid_word_alignment(grid, clues):
"""Check if all words are properly aligned and don't create unintended extensions."""
print("\nChecking grid word alignment:")
# Find all letter sequences in the grid
horizontal_sequences = find_horizontal_sequences(grid)
vertical_sequences = find_vertical_sequences(grid)
print(f" Found {len(horizontal_sequences)} horizontal sequences")
print(f" Found {len(vertical_sequences)} vertical sequences")
# Check if each sequence corresponds to a clue
clue_words = {}
for clue in clues:
pos = clue.get("position", {})
key = (pos.get("row"), pos.get("col"), clue.get("direction"))
clue_words[key] = clue.get("word", "")
issues = []
# Check horizontal sequences
for seq in horizontal_sequences:
row, start_col, word = seq
key = (row, start_col, "across")
if key not in clue_words:
issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})")
elif clue_words[key] != word:
issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})")
# Check vertical sequences
for seq in vertical_sequences:
col, start_row, word = seq
key = (start_row, col, "down")
if key not in clue_words:
issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})")
elif clue_words[key] != word:
issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})")
if issues:
print(" ❌ Alignment issues found:")
for issue in issues:
print(f" {issue}")
else:
print(" ✅ All words are properly aligned")
def find_horizontal_sequences(grid):
"""Find all horizontal letter sequences of length > 1."""
sequences = []
for r in range(len(grid)):
current_word = ""
start_col = None
for c in range(len(grid[0])):
if grid[r][c] != ".":
if start_col is None:
start_col = c
current_word += grid[r][c]
else:
if current_word and len(current_word) > 1:
sequences.append((r, start_col, current_word))
current_word = ""
start_col = None
# Handle word at end of row
if current_word and len(current_word) > 1:
sequences.append((r, start_col, current_word))
return sequences
def find_vertical_sequences(grid):
"""Find all vertical letter sequences of length > 1."""
sequences = []
for c in range(len(grid[0])):
current_word = ""
start_row = None
for r in range(len(grid)):
if grid[r][c] != ".":
if start_row is None:
start_row = r
current_word += grid[r][c]
else:
if current_word and len(current_word) > 1:
sequences.append((c, start_row, current_word))
current_word = ""
start_row = None
# Handle word at end of column
if current_word and len(current_word) > 1:
sequences.append((c, start_row, current_word))
return sequences
if __name__ == "__main__":
asyncio.run(debug_complete_generation())