Spaces:

Ujjwal123
/

EZ-Crossword

Running

App Files Files Community

EZ-Crossword / Utils_inf.py

Ujjwal123

copied the whole api code from django and updated the dockerfile

a04b340 6 months ago

raw history blame contribute delete

No virus

3.52 kB

	import json
	import puz
	import wordsegment
	import math
	from wordsegment import load, segment, clean
	import os
	load()

	dictionary = set([a.strip() for a in open('./words_alpha.txt','r').readlines()])

	def num_words(fill):
	'''segment the text into multiple words and count how many words the text has in total'''
	segmented = segment(fill)
	prob = 0.0
	for word in segmented:
	if word not in dictionary:
	return 999, -9999999999999
	prob += math.log(wordsegment.UNIGRAMS[word])
	return (len(segmented), prob)

	def get_word_flips(fill, num_candidates=10):
	'''
	We take as input a word/phrase that is probably mispelled, something like iluveyou. We then try flipping each one of the letters
	to all other letters. We then segment those texts into multiple words using num_words, e.g., iloveyou -> i love you. We return the candidates
	that segment into the fewest number of words.
	'''
	results = {}
	min_length = 999
	fill = clean(fill)
	for index, char in enumerate(fill):
	for new_letter in 'abcdefghijklmnopqrstuvwxyz':
	new_fill = list(fill)
	new_fill[index] = new_letter
	new_fill = ''.join(new_fill)
	curr_num_words, prob = num_words(new_fill)
	if curr_num_words not in results:
	results[curr_num_words] = []
	results[curr_num_words].append((new_fill, prob))
	if curr_num_words < min_length:
	min_length = curr_num_words
	if min_length == 999:
	return [fill.upper()]
	all_results = sum([sorted(results[length], key=lambda x:-x[1]) for length in sorted(list(results.keys()))], [])
	return [a[0].upper() for a in all_results[0:num_candidates]]

	def convert_puz(fname):
	# requires pypuz library to run
	# converts a puzzle in .puz format to .json format
	p = puz.read(fname)

	numbering = p.clue_numbering()

	grid = [[None for _ in range(p.width)] for _ in range(p.height)]
	for row_idx in range(p.height):
	cell = row_idx * p.width
	row_solution = p.solution[cell:cell + p.width]
	for col_index, item in enumerate(row_solution):
	if p.solution[cell + col_index:cell + col_index + 1] == '.':
	grid[row_idx][col_index] = 'BLACK'
	else:
	grid[row_idx][col_index] = ["", row_solution[col_index: col_index + 1]]

	across_clues = {}
	for clue in numbering.across:
	answer = ''.join(p.solution[clue['cell'] + i] for i in range(clue['len']))
	across_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer]
	grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num'])

	down_clues = {}
	for clue in numbering.down:
	answer = ''.join(p.solution[clue['cell'] + i * numbering.width] for i in range(clue['len']))
	down_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer]
	grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num'])


	mydict = {'metadata': {'date': None, 'rows': p.height, 'cols': p.width}, 'clues': {'across': across_clues, 'down': down_clues}, 'grid': grid}
	return mydict

	def clean(text):
	'''
	:param text: question or answer text
	:return: text with line breaks and trailing spaces removed
	'''
	return " ".join(text.strip().split())

	def print_grid(letter_grid):
	for row in letter_grid:
	row = [" " if val == "" else val for val in row]
	print("".join(row), flush=True)