Spaces:

vimalk78
/

abc123

Running

App Files Files Community

abc123 / crossword-app /backend-py /debug_full_generation.py

vimalk78

Add complete Python backend with AI-powered crossword generation

b01ceb3 3 months ago

raw

history blame

10.5 kB

	#!/usr/bin/env python3
	"""
	Debug the complete crossword generation process to identify display/numbering issues.
	"""

	import asyncio
	import sys
	import json
	from pathlib import Path

	# Add project root to path
	project_root = Path(__file__).parent
	sys.path.insert(0, str(project_root))

	from src.services.crossword_generator_fixed import CrosswordGeneratorFixed

	async def debug_complete_generation():
	"""Debug the complete crossword generation process."""

	print("🔍 Debugging Complete Crossword Generation Process\n")

	# Create generator with no vector service to use static words
	generator = CrosswordGeneratorFixed(vector_service=None)

	# Override the word selection to use controlled test words
	test_words = [
	{"word": "MACHINE", "clue": "Device with moving parts"},
	{"word": "COMPUTER", "clue": "Electronic device"},
	{"word": "EXPERT", "clue": "Person with specialized knowledge"},
	{"word": "SCIENCE", "clue": "Systematic study"},
	{"word": "TECHNOLOGY", "clue": "Applied science"},
	{"word": "RESEARCH", "clue": "Systematic investigation"},
	{"word": "ANALYSIS", "clue": "Detailed examination"},
	{"word": "METHOD", "clue": "Systematic approach"}
	]

	# Mock the word selection method
	async def mock_select_words(topics, difficulty, use_ai):
	return test_words
	generator._select_words = mock_select_words

	print("=" * 70)
	print("GENERATING COMPLETE CROSSWORD")
	print("=" * 70)

	try:
	result = await generator.generate_puzzle(["technology"], "medium", use_ai=False)

	if result:
	print("✅ Crossword generation successful!")

	# Analyze the complete result
	analyze_crossword_result(result)
	else:
	print("❌ Crossword generation failed - returned None")

	except Exception as e:
	print(f"❌ Crossword generation failed with error: {e}")
	import traceback
	traceback.print_exc()

	def analyze_crossword_result(result):
	"""Analyze the complete crossword result for potential issues."""

	print("\n" + "=" * 70)
	print("CROSSWORD RESULT ANALYSIS")
	print("=" * 70)

	# Print basic metadata
	metadata = result.get("metadata", {})
	print("Metadata:")
	for key, value in metadata.items():
	print(f" {key}: {value}")

	# Analyze the grid
	grid = result.get("grid", [])
	print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}")

	print("\nGrid layout:")
	print_numbered_grid(grid)

	# Analyze placed words vs clues
	clues = result.get("clues", [])
	print(f"\nNumber of clues generated: {len(clues)}")

	print("\nClue analysis:")
	for i, clue in enumerate(clues):
	print(f" Clue {i+1}:")
	print(f" Number: {clue.get('number', 'MISSING')}")
	print(f" Word: {clue.get('word', 'MISSING')}")
	print(f" Direction: {clue.get('direction', 'MISSING')}")
	print(f" Position: {clue.get('position', 'MISSING')}")
	print(f" Text: {clue.get('text', 'MISSING')}")

	# Check for potential issues
	print("\n" + "=" * 70)
	print("ISSUE DETECTION")
	print("=" * 70)

	check_word_boundary_consistency(grid, clues)
	check_numbering_consistency(clues)
	check_grid_word_alignment(grid, clues)

	def print_numbered_grid(grid):
	"""Print grid with coordinates for analysis."""
	if not grid:
	print(" Empty grid")
	return

	# Print column headers
	print(" ", end="")
	for c in range(len(grid[0])):
	print(f"{c:2d}", end="")
	print()

	# Print rows with row numbers
	for r in range(len(grid)):
	print(f" {r:2d}: ", end="")
	for c in range(len(grid[0])):
	cell = grid[r][c]
	if cell == ".":
	print(" .", end="")
	else:
	print(f" {cell}", end="")
	print()

	def check_word_boundary_consistency(grid, clues):
	"""Check if words in clues match what's actually in the grid."""

	print("Checking word boundary consistency:")

	issues_found = []

	for clue in clues:
	word = clue.get("word", "")
	position = clue.get("position", {})
	direction = clue.get("direction", "")

	if not all([word, position, direction]):
	issues_found.append(f"Incomplete clue data: {clue}")
	continue

	row = position.get("row", -1)
	col = position.get("col", -1)

	if row < 0 or col < 0:
	issues_found.append(f"Invalid position for word '{word}': {position}")
	continue

	# Extract the actual word from the grid
	grid_word = extract_word_from_grid(grid, row, col, direction, len(word))

	if grid_word != word:
	issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'")

	if issues_found:
	print(" ❌ Issues found:")
	for issue in issues_found:
	print(f" {issue}")
	else:
	print(" ✅ All words match grid positions")

	def extract_word_from_grid(grid, row, col, direction, expected_length):
	"""Extract a word from the grid at the given position and direction."""

	if row >= len(grid) or col >= len(grid[0]):
	return "OUT_OF_BOUNDS"

	word = ""

	if direction == "across": # horizontal
	for i in range(expected_length):
	if col + i >= len(grid[0]):
	return word + "TRUNCATED"
	word += grid[row][col + i]

	elif direction == "down": # vertical
	for i in range(expected_length):
	if row + i >= len(grid):
	return word + "TRUNCATED"
	word += grid[row + i][col]

	return word

	def check_numbering_consistency(clues):
	"""Check if clue numbering is consistent and logical."""

	print("\nChecking numbering consistency:")

	numbers = [clue.get("number", -1) for clue in clues]
	issues = []

	# Check for duplicate numbers
	if len(numbers) != len(set(numbers)):
	issues.append("Duplicate clue numbers found")

	# Check for missing numbers in sequence
	if numbers:
	min_num = min(numbers)
	max_num = max(numbers)
	expected = set(range(min_num, max_num + 1))
	actual = set(numbers)

	if expected != actual:
	missing = expected - actual
	extra = actual - expected
	if missing:
	issues.append(f"Missing numbers: {sorted(missing)}")
	if extra:
	issues.append(f"Extra numbers: {sorted(extra)}")

	if issues:
	print(" ❌ Numbering issues:")
	for issue in issues:
	print(f" {issue}")
	else:
	print(" ✅ Numbering is consistent")

	def check_grid_word_alignment(grid, clues):
	"""Check if all words are properly aligned and don't create unintended extensions."""

	print("\nChecking grid word alignment:")

	# Find all letter sequences in the grid
	horizontal_sequences = find_horizontal_sequences(grid)
	vertical_sequences = find_vertical_sequences(grid)

	print(f" Found {len(horizontal_sequences)} horizontal sequences")
	print(f" Found {len(vertical_sequences)} vertical sequences")

	# Check if each sequence corresponds to a clue
	clue_words = {}
	for clue in clues:
	pos = clue.get("position", {})
	key = (pos.get("row"), pos.get("col"), clue.get("direction"))
	clue_words[key] = clue.get("word", "")

	issues = []

	# Check horizontal sequences
	for seq in horizontal_sequences:
	row, start_col, word = seq
	key = (row, start_col, "across")
	if key not in clue_words:
	issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})")
	elif clue_words[key] != word:
	issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})")

	# Check vertical sequences
	for seq in vertical_sequences:
	col, start_row, word = seq
	key = (start_row, col, "down")
	if key not in clue_words:
	issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})")
	elif clue_words[key] != word:
	issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})")

	if issues:
	print(" ❌ Alignment issues found:")
	for issue in issues:
	print(f" {issue}")
	else:
	print(" ✅ All words are properly aligned")

	def find_horizontal_sequences(grid):
	"""Find all horizontal letter sequences of length > 1."""
	sequences = []

	for r in range(len(grid)):
	current_word = ""
	start_col = None

	for c in range(len(grid[0])):
	if grid[r][c] != ".":
	if start_col is None:
	start_col = c
	current_word += grid[r][c]
	else:
	if current_word and len(current_word) > 1:
	sequences.append((r, start_col, current_word))
	current_word = ""
	start_col = None

	# Handle word at end of row
	if current_word and len(current_word) > 1:
	sequences.append((r, start_col, current_word))

	return sequences

	def find_vertical_sequences(grid):
	"""Find all vertical letter sequences of length > 1."""
	sequences = []

	for c in range(len(grid[0])):
	current_word = ""
	start_row = None

	for r in range(len(grid)):
	if grid[r][c] != ".":
	if start_row is None:
	start_row = r
	current_word += grid[r][c]
	else:
	if current_word and len(current_word) > 1:
	sequences.append((c, start_row, current_word))
	current_word = ""
	start_row = None

	# Handle word at end of column
	if current_word and len(current_word) > 1:
	sequences.append((c, start_row, current_word))

	return sequences

	if __name__ == "__main__":
	asyncio.run(debug_complete_generation())