import os
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import pandas as pd
import difflib
import json
import pickle as pkl
import csv
import numpy as np


# ----------------------------------------------------------------------------------------------------------------- #
class DogBreed(object):
    def __init__(self, abbrev, name_akc=None, name_stanext=None, name_xlsx=None, path_akc=None, path_stanext=None, ind_in_xlsx=None, ind_in_xlsx_matrix=None, ind_in_stanext=None, clade=None):
        self._abbrev = abbrev 
        self._name_xlsx = name_xlsx 
        self._name_akc = name_akc
        self._name_stanext = name_stanext
        self._path_stanext = path_stanext
        self._additional_names = set()
        if self._name_akc is not None:
            self.add_akc_info(name_akc, path_akc)
        if self._name_stanext is not None:
            self.add_stanext_info(name_stanext, path_stanext, ind_in_stanext)
        if self._name_xlsx is not None:
            self.add_xlsx_info(name_xlsx, ind_in_xlsx, ind_in_xlsx_matrix, clade)
    def add_xlsx_info(self, name_xlsx, ind_in_xlsx, ind_in_xlsx_matrix, clade):
        assert (name_xlsx is not None) and (ind_in_xlsx is not None) and (ind_in_xlsx_matrix is not None) and (clade is not None)
        self._name_xlsx = name_xlsx
        self._ind_in_xlsx = ind_in_xlsx
        self._ind_in_xlsx_matrix = ind_in_xlsx_matrix
        self._clade = clade
    def add_stanext_info(self, name_stanext, path_stanext, ind_in_stanext):
        assert (name_stanext is not None) and (path_stanext is not None) and (ind_in_stanext is not None)
        self._name_stanext = name_stanext
        self._path_stanext = path_stanext
        self._ind_in_stanext = ind_in_stanext
    def add_akc_info(self, name_akc, path_akc):
        assert (name_akc is not None) and (path_akc is not None)
        self._name_akc = name_akc
        self._path_akc = path_akc
    def add_additional_names(self, name_list):
        self._additional_names = self._additional_names.union(set(name_list)) 
    def add_text_info(self, text_height, text_weight, text_life_exp):
        self._text_height = text_height
        self._text_weight = text_weight
        self._text_life_exp = text_life_exp
    def get_datasets(self):
        # all datasets in which this breed is found
        datasets = set()
        if self._name_akc is not None:
            datasets.add('akc')
        if self._name_stanext is not None:
            datasets.add('stanext')
        if self._name_xlsx is not None:
            datasets.add('xlsx')
        return datasets
    def get_names(self):
        # set of names for this breed
        names = {self._abbrev, self._name_akc, self._name_stanext, self._name_xlsx, self._path_stanext}.union(self._additional_names)
        names.discard(None)
        return names
    def get_names_as_pointing_dict(self):
        # each name points to the abbreviation
        names = self.get_names()
        my_dict = {}
        for name in names:
            my_dict[name] = self._abbrev
        return my_dict
    def print_overview(self):
        # print important information to get an overview of the class instance
        if self._name_akc is not None:
            name = self._name_akc
        elif self._name_xlsx is not None:
            name = self._name_xlsx
        else:
            name = self._name_stanext
        print('----------------------------------------------------')
        print('----- dog breed: ' + name )
        print('----------------------------------------------------')
        print('[names]')
        print(self.get_names())
        print('[datasets]')
        print(self.get_datasets())
        # see https://stackoverflow.com/questions/9058305/getting-attributes-of-a-class
        print('[instance attributes]')
        for attribute, value in self.__dict__.items():
            print(attribute, '=', value)
    def use_dict_to_save_class_instance(self):
        my_dict = {}
        for attribute, value in self.__dict__.items():
            my_dict[attribute] = value
        return my_dict
    def use_dict_to_load_class_instance(self, my_dict):
        for attribute, value in my_dict.items():
            setattr(self, attribute, value)
        return 

# ----------------------------------------------------------------------------------------------------------------- #
def get_name_list_from_summary(summary):
    name_from_abbrev_dict = {}
    for breed in summary.values():
        abbrev = breed._abbrev
        all_names = breed.get_names()
        name_from_abbrev_dict[abbrev] = list(all_names)
    return name_from_abbrev_dict
def get_partial_summary(summary, part):
    assert part in ['xlsx', 'akc', 'stanext']
    partial_summary = {}
    for key, value in summary.items():
        if (part == 'xlsx' and value._name_xlsx is not None) \
            or (part == 'akc' and value._name_akc is not None) \
            or (part == 'stanext' and value._name_stanext is not None):
            partial_summary[key] = value
    return partial_summary
def get_akc_but_not_stanext_partial_summary(summary):
    partial_summary = {}
    for key, value in summary.items():
        if value._name_akc is not None:
            if value._name_stanext is None:
                partial_summary[key] = value
    return partial_summary    

# ----------------------------------------------------------------------------------------------------------------- #
def main_load_dog_breed_classes(path_complete_abbrev_dict_v1, path_complete_summary_breeds_v1):
    with open(path_complete_abbrev_dict_v1, 'rb') as file:
        complete_abbrev_dict = pkl.load(file)
    with open(path_complete_summary_breeds_v1, 'rb') as file: 
        complete_summary_breeds_attributes_only = pkl.load(file)
    
    complete_summary_breeds = {}
    for key, value in complete_summary_breeds_attributes_only.items():
        attributes_only = complete_summary_breeds_attributes_only[key]
        complete_summary_breeds[key] = DogBreed(abbrev=attributes_only['_abbrev'])
        complete_summary_breeds[key].use_dict_to_load_class_instance(attributes_only)
    return complete_abbrev_dict, complete_summary_breeds


# ----------------------------------------------------------------------------------------------------------------- #
def load_similarity_matrix_raw(xlsx_path):
    # --- LOAD EXCEL FILE FROM DOG BREED PAPER
    xlsx = pd.read_excel(xlsx_path)
    # create an array
    abbrev_indices = {}
    matrix_raw = np.zeros((168, 168))
    for ind in range(1, 169):
        abbrev = xlsx[xlsx.columns[2]][ind]
        abbrev_indices[abbrev] = ind-1
    for ind_col in range(0, 168):
        for ind_row in range(0, 168):
            matrix_raw[ind_col, ind_row] = float(xlsx[xlsx.columns[3+ind_col]][1+ind_row])
    return matrix_raw, abbrev_indices


# ----------------------------------------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------------------------------------- #
# load the (in advance created) final dict of dog breed classes
ROOT_PATH_BREED_DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', 'data', 'breed_data')
path_complete_abbrev_dict_v1 = os.path.join(ROOT_PATH_BREED_DATA, 'complete_abbrev_dict_v2.pkl')
path_complete_summary_breeds_v1 = os.path.join(ROOT_PATH_BREED_DATA, 'complete_summary_breeds_v2.pkl')
COMPLETE_ABBREV_DICT, COMPLETE_SUMMARY_BREEDS = main_load_dog_breed_classes(path_complete_abbrev_dict_v1, path_complete_summary_breeds_v1)
# load similarity matrix, data from: 
#   Parker H. G., Dreger D. L., Rimbault M., Davis B. W., Mullen A. B., Carpintero-Ramirez G., and Ostrander E. A.
#   Genomic analyses reveal the influence of geographic origin, migration, and hybridization on modern dog breed 
#   development. Cell Reports, 4(19):697–708, 2017.
xlsx_path = os.path.join(ROOT_PATH_BREED_DATA, 'NIHMS866262-supplement-2.xlsx')
SIM_MATRIX_RAW, SIM_ABBREV_INDICES = load_similarity_matrix_raw(xlsx_path)