import json import puz import wordsegment import math from wordsegment import load, segment, clean import os load() dictionary = set([a.strip() for a in open('./words_alpha.txt','r').readlines()]) def num_words(fill): '''segment the text into multiple words and count how many words the text has in total''' segmented = segment(fill) prob = 0.0 for word in segmented: if word not in dictionary: return 999, -9999999999999 prob += math.log(wordsegment.UNIGRAMS[word]) return (len(segmented), prob) def get_word_flips(fill, num_candidates=10): ''' We take as input a word/phrase that is probably mispelled, something like iluveyou. We then try flipping each one of the letters to all other letters. We then segment those texts into multiple words using num_words, e.g., iloveyou -> i love you. We return the candidates that segment into the fewest number of words. ''' results = {} min_length = 999 fill = clean(fill) for index, char in enumerate(fill): for new_letter in 'abcdefghijklmnopqrstuvwxyz': new_fill = list(fill) new_fill[index] = new_letter new_fill = ''.join(new_fill) curr_num_words, prob = num_words(new_fill) if curr_num_words not in results: results[curr_num_words] = [] results[curr_num_words].append((new_fill, prob)) if curr_num_words < min_length: min_length = curr_num_words if min_length == 999: return [fill.upper()] all_results = sum([sorted(results[length], key=lambda x:-x[1]) for length in sorted(list(results.keys()))], []) return [a[0].upper() for a in all_results[0:num_candidates]] def convert_puz(fname): # requires pypuz library to run # converts a puzzle in .puz format to .json format p = puz.read(fname) numbering = p.clue_numbering() grid = [[None for _ in range(p.width)] for _ in range(p.height)] for row_idx in range(p.height): cell = row_idx * p.width row_solution = p.solution[cell:cell + p.width] for col_index, item in enumerate(row_solution): if p.solution[cell + col_index:cell + col_index + 1] == '.': grid[row_idx][col_index] = 'BLACK' else: grid[row_idx][col_index] = ["", row_solution[col_index: col_index + 1]] across_clues = {} for clue in numbering.across: answer = ''.join(p.solution[clue['cell'] + i] for i in range(clue['len'])) across_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer] grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num']) down_clues = {} for clue in numbering.down: answer = ''.join(p.solution[clue['cell'] + i * numbering.width] for i in range(clue['len'])) down_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer] grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num']) mydict = {'metadata': {'date': None, 'rows': p.height, 'cols': p.width}, 'clues': {'across': across_clues, 'down': down_clues}, 'grid': grid} return mydict def clean(text): ''' :param text: question or answer text :return: text with line breaks and trailing spaces removed ''' return " ".join(text.strip().split()) def print_grid(letter_grid): for row in letter_grid: row = [" " if val == "" else val for val in row] print("".join(row), flush=True)