import re
from collections import defaultdict
import string

from scipy.special import softmax
import numpy as np

from Models_inf import answer_clues, setup_closedbook

class Solver:
    """
    This class represents an abstraction over different types of crossword solvers. Each puzzle contains
    a list of clues, which are associated with (weighted) values for each candidate answer.

    Args:
        crossword (Crossword): puzzle to solve
        max_candidates (int): number of answer candidates to consider per clue
    """
    def __init__(self, crossword, model_path, ans_tsv_path, dense_embd_path, max_candidates = 100, process_id = 0, model_type = 'bert'):
        self.crossword = crossword
        self.max_candidates = max_candidates
        self.process_id = process_id
        self.model_path = model_path
        self.ans_tsv_path = ans_tsv_path
        self.dense_embd_glob = dense_embd_path, 
        self.model_type = model_type
        self.get_candidates()
    
    def get_candidates(self):
        # get answers from neural model and fill up data structures with the results
        chars = string.ascii_uppercase
        self.char_map = {char: idx for idx, char in enumerate(chars)}
        self.candidates = {}
        
        all_clues = []
        for var in self.crossword.variables:
            all_clues.append(self.crossword.variables[var]['clue'])
        
        # replaces stuff like "Opposite of 29-across" with "Opposite of X", where X is the clue for 29-across
        r = re.compile('([0-9]+)[-\s](down|across)', re.IGNORECASE)
        matches = [(idx, r.search(clue)) for idx, clue in enumerate(all_clues) if r.search(clue) != None]
        for (idx, match) in matches:
            clue = all_clues[idx]
            var = str(match.group(1)) + str(match.group(2)[0]).upper()
            if var in self.crossword.variables:
                clue = clue[:match.start()] + self.crossword.variables[var]['clue'] + clue[match.end():]
                all_clues[idx] = clue

        # get predictions
        dpr = setup_closedbook(self.model_path, self.ans_tsv_path, self.dense_embd_glob, self.process_id, self.model_type)
        all_words, all_scores = answer_clues(dpr, all_clues, max_answers = self.max_candidates, output_strings=True) 
        
        for index, var in enumerate(self.crossword.variables):
            length = len(self.crossword.variables[var]["gold"])
            self.candidates[var] = {"words": [], "bit_array": None, "weights": {}}

            clue = all_clues[index]
            words, scores = all_words[index], all_scores[index]
            # remove answers that are not of the correct length
            keep_positions = []
            for word_index, word in enumerate(words):
                if len(word) == length:
                    keep_positions.append(word_index)
            words = [words[i] for i in keep_positions]
            scores = [scores[i] for i in keep_positions]

            scores = list(-np.log(softmax(np.array(scores) / 0.75)))

            for word, score in zip(words, scores):
                self.candidates[var]["weights"][word] = score

            
            weights = self.candidates[var]["weights"]
            self.candidates[var]["words"] = sorted(weights, key=weights.get)
            self.candidates[var]["bit_array"] = np.zeros((len(chars), length, len(self.candidates[var]["words"])))
            self.candidates[var]["single_query_cache"] = [defaultdict(lambda:[]) for _ in range(len(chars))]
            self.candidates[var]["single_query_cache_indices"] = [defaultdict(lambda:[]) for _ in range(len(chars))]
            for word_idx, word in enumerate(self.candidates[var]["words"]):
                for pos_idx, char in enumerate(word):
                    char_idx = self.char_map[char]
                    self.candidates[var]["bit_array"][char_idx, pos_idx, word_idx] = 1
                    self.candidates[var]["single_query_cache"][pos_idx][char].append(word)
                    self.candidates[var]["single_query_cache_indices"][pos_idx][char].append(word_idx)
                    # NOTE: TODO, it's possible to cache more here in exchange for doing more work at init time

        # cleanup a bit
        del dpr

    def evaluate(self, solution, print_log = True):
        letters_correct = 0
        letters_total = 0
        for i in range(len(self.crossword.letter_grid)):
            for j in range(len(self.crossword.letter_grid[0])):
                if self.crossword.letter_grid[i][j] != "":
                    letters_correct += (self.crossword.letter_grid[i][j] == solution[i][j])
                    letters_total += 1
        words_correct = 0
        words_total = 0
        for var in self.crossword.variables:
            cells = self.crossword.variables[var]["cells"]
            matching_cells = [self.crossword.letter_grid[cell[0]][cell[1]] == solution[cell[0]][cell[1]] for cell in cells]
            if len(cells) == sum(matching_cells):
                words_correct += 1
            words_total += 1

        letter_frac_log = "Letters Correct: {}/{} | Words Correct: {}/{}".format(int(letters_correct), int(letters_total), int(words_correct), int(words_total))
        letter_acc_log = "Letters Correct: {}% | Words Correct: {}%".format(float(letters_correct/letters_total*100), float(words_correct/words_total*100))

        if print_log:
            print(letter_frac_log)
            print(letter_acc_log)
        
        return letter_frac_log, letter_acc_log
    
    def evaluate1(self, solution):
        # print puzzle accuracy results given a generated solution
        letters_correct = 0
        letters_total = 0
        for i in range(len(self.crossword.letter_grid)):
            for j in range(len(self.crossword.letter_grid[0])):
                if self.crossword.letter_grid[i][j] != "":
                    letters_correct += (self.crossword.letter_grid[i][j] == solution[i][j])
                    letters_total += 1
        words_correct = 0
        words_total = 0
        for var in self.crossword.variables:
            cells = self.crossword.variables[var]["cells"]
            matching_cells = [self.crossword.letter_grid[cell[0]][cell[1]] == solution[cell[0]][cell[1]] for cell in cells]
            if len(cells) == sum(matching_cells):
                words_correct += 1
            else:
                # print('evaluation: correct word', ''.join([self.crossword.letter_grid[cell[0]][cell[1]] for cell in cells]), 'our prediction:', ''.join([solution[cell[0]][cell[1]] for cell in cells]))
                pass
            words_total += 1
        
        print("Letters Correct: {}/{} | Words Correct: {}/{}".format(int(letters_correct), int(letters_total), int(words_correct), int(words_total)))
        print("Letters Correct: {}% | Words Correct: {}%".format(float(letters_correct/letters_total*100), float(words_correct/words_total*100)))
        
        info = {
            "total_letters" : int(letters_total),
            "total_words" : int(words_total),
            "correct_letters" : int(letters_correct),
            "correct_words" : int(words_correct),
            "correct_letters_percent" : float(letters_correct/letters_total*100),
            "correct_words_percent" : float(words_correct/words_total*100),
        }
        return info