import json
import puz
import wordsegment
import math
from wordsegment import load, segment, clean
import os
load()

dictionary = set([a.strip() for a in open('./words_alpha.txt','r').readlines()])

def num_words(fill):
    '''segment the text into multiple words and count how many words the text has in total'''
    segmented = segment(fill)
    prob = 0.0
    for word in segmented:
        if word not in dictionary:
            return 999, -9999999999999
        prob += math.log(wordsegment.UNIGRAMS[word])
    return (len(segmented), prob)

def get_word_flips(fill, num_candidates=10):
    '''
    We take as input a word/phrase that is probably mispelled, something like iluveyou. We then try flipping each one of the letters
    to all other letters. We then segment those texts into multiple words using num_words, e.g., iloveyou -> i love you. We return the candidates
    that segment into the fewest number of words.
    '''
    results = {}
    min_length = 999
    fill = clean(fill)
    for index, char in enumerate(fill):
        for new_letter in 'abcdefghijklmnopqrstuvwxyz':
            new_fill = list(fill)
            new_fill[index] = new_letter
            new_fill = ''.join(new_fill)
            curr_num_words, prob = num_words(new_fill)
            if curr_num_words not in results:
                results[curr_num_words] = []
            results[curr_num_words].append((new_fill, prob))
            if curr_num_words < min_length:
                min_length = curr_num_words
    if min_length == 999:
        return [fill.upper()]
    all_results = sum([sorted(results[length], key=lambda x:-x[1]) for length in sorted(list(results.keys()))], [])
    return [a[0].upper() for a in all_results[0:num_candidates]]

def convert_puz(fname):
    # requires pypuz library to run
    # converts a puzzle in .puz format to .json format
    p = puz.read(fname)

    numbering = p.clue_numbering()

    grid = [[None for _ in range(p.width)] for _ in range(p.height)]
    for row_idx in range(p.height):
        cell = row_idx * p.width
        row_solution = p.solution[cell:cell + p.width]
        for col_index, item in enumerate(row_solution):
            if p.solution[cell + col_index:cell + col_index + 1] == '.':
                grid[row_idx][col_index] = 'BLACK'
            else:
                grid[row_idx][col_index] = ["", row_solution[col_index: col_index + 1]]

    across_clues = {}
    for clue in numbering.across:
        answer = ''.join(p.solution[clue['cell'] + i] for i in range(clue['len']))
        across_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer]
        grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num'])

    down_clues = {}
    for clue in numbering.down:
        answer = ''.join(p.solution[clue['cell'] + i * numbering.width] for i in range(clue['len']))
        down_clues[str(clue['num'])] = [clue['clue'] + ' ', ' ' + answer]
        grid[int(clue['cell'] / p.width)][clue['cell'] % p.width][0] = str(clue['num'])


    mydict = {'metadata': {'date': None, 'rows': p.height, 'cols': p.width}, 'clues': {'across': across_clues, 'down': down_clues}, 'grid': grid}
    return mydict

def clean(text):
    '''
    :param text: question or answer text
    :return: text with line breaks and trailing spaces removed
    '''
    return " ".join(text.strip().split())

def print_grid(letter_grid):
    for row in letter_grid:
        row = [" " if val == "" else val for val in row]
        print("".join(row), flush=True)