import pickle
import spacy
import time

from text_analytics.constants import ACCEPTED_LANGUAGES
from text_analytics.constants import BASE_DIRECTORY
from text_analytics.indices.connective_indices import ConnectiveIndices
from text_analytics.indices.descriptive_indices import DescriptiveIndices
from text_analytics.indices.lexical_diversity_indices import LexicalDiversityIndices
from text_analytics.indices.readability_indices import ReadabilityIndices
from text_analytics.indices.syntactic_complexity_indices import SyntacticComplexityIndices
from text_analytics.indices.syntactic_pattern_density_indices import SyntacticPatternDensityIndices
from text_analytics.indices.word_information_indices import WordInformationIndices
from text_analytics.pipes.negative_expression_tagger import NegativeExpressionTagger
from text_analytics.pipes.noun_phrase_tagger import NounPhraseTagger
from text_analytics.pipes.syllable_splitter import SyllableSplitter
from text_analytics.pipes.verb_phrase_tagger import VerbPhraseTagger
from text_analytics.pipes.causal_connectives_tagger import CausalConnectivesTagger
from text_analytics.pipes.logical_connectives_tagger import LogicalConnectivesTagger
from text_analytics.pipes.adversative_connectives_tagger import AdversativeConnectivesTagger
from text_analytics.pipes.temporal_connectives_tagger import TemporalConnectivesTagger
from text_analytics.pipes.additive_connectives_tagger import AdditiveConnectivesTagger
from text_analytics.pipes.emphatics_tagger import EmphaticsTagger
from text_analytics.pipes.asks_tagger import AsksTagger
from text_analytics.pipes.polites_tagger import PolitesTagger
from text_analytics.pipes.feature_counter import FeatureCounter
from typing import Dict
from typing import List


class TextComplexityAnalyzer:
    '''
    This class groups all of the indices in order to calculate them in one go. It works for a specific language.

    To use this class, instantiate an object with it. For example:
    tca = TextComplexityAnalyzer('en')

    Notice that a short version of the language was passed. The only languages available for now are: 'en'.

    To calculate the implemented coh-metrix indices for a text, do the following:
    m1, m2, m3, m4, m5, m6, m7, m8 = tca.calculate_all_indices_for_one_text(text='Example text', workers=-1)

    Here, all available cores will be used to analyze the text passed as parameter.

    To predict the category of a text, do the following:
    prediction = tca.predict_text_category(text='Example text', workers=-1)

    The example uses the default classifier stored along the library.
    '''
    def __init__(self, language:str = 'en') -> None:
        '''
        This constructor initializes the analizer for a specific language.

        Parameters:
        language(str): The language that the texts are in.
        
        Returns:
        None.
        '''
        if not language in ACCEPTED_LANGUAGES:
            raise ValueError(f'Language {language} is not supported yet')
        
        self.language = language
        self._nlp = spacy.load(ACCEPTED_LANGUAGES[language], disable=['ner'])
        self._nlp.max_length = 3000000
        self._nlp.add_pipe('sentencizer')
        self._nlp.add_pipe('syllables', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('causal connective tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('temporal connective tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('emphatics tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('asks tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('polites tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('logical connective tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('adversative connective tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('additive connective tagger', config={
                           "language": 'en'}, after='tagger')
        self._nlp.add_pipe('feature counter', config={
                           "language": 'en'}, last=True)
        self._di = DescriptiveIndices(language=language, nlp=self._nlp)
        self._spdi = SyntacticPatternDensityIndices(language=language, nlp=self._nlp, descriptive_indices=self._di)
        self._wii = WordInformationIndices(language=language, nlp=self._nlp, descriptive_indices=self._di)
        self._sci = SyntacticComplexityIndices(language=language, nlp=self._nlp)
        self._ci = ConnectiveIndices(language=language, nlp=self._nlp, descriptive_indices=self._di)
        self._ldi = LexicalDiversityIndices(language=language, nlp=self._nlp)
        self._ri = ReadabilityIndices(language=language, nlp=self._nlp, descriptive_indices=self._di)

        # Load default classifier
        # self._classifier = pickle.load(open(f'{BASE_DIRECTORY}/model/classifier.pkl', 'rb'))
        # self._scaler = pickle.load(open(f'{BASE_DIRECTORY}/model/scaler.pkl', 'rb'))
        # self._indices = ['CNCADC', 'CNCAdd', 'CNCAll', 'CNCCaus', 'CNCLogic', 'CNCTemp', 'CRFANP1', 'CRFANPa', 'CRFAO1', 'CRFAOa', 'CRFCWO1', 'CRFCWO1d', 'CRFCWOa', 'CRFCWOad', 'CRFNO1', 'CRFNOa', 'CRFSO1', 'CRFSOa', 'DESPC', 'DESPL', 'DESPLd', 'DESSC', 'DESSL', 'DESSLd', 'DESWC', 'DESWLlt', 'DESWLltd', 'DESWLsy', 'DESWLsyd', 'DRNEG', 'DRNP', 'DRVP', 'LDTTRa', 'LDTTRcw', 'RDFHGL', 'SYNLE', 'SYNNP', 'WRDADJ', 'WRDADV', 'WRDNOUN', 'WRDPRO', 'WRDPRP1p', 'WRDPRP1s', 'WRDPRP2p', 'WRDPRP2s', 'WRDPRP3p', 'WRDPRP3s', 'WRDVERB']


    def calculate_descriptive_indices_for_one_text(self, text: str, workers: int=-1) -> Dict:
        '''
        This method calculates the descriptive indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.

        Returns:
        Dict: The dictionary with the descriptive indices.
        '''
        indices = {}
        indices['DESPC'] = self._di.get_paragraph_count_from_text(text=text)
        indices['DESSC'] = self._di.get_sentence_count_from_text(text=text, workers=workers)
        indices['DESWC'] = self._di.get_word_count_from_text(text=text, workers=workers)
        length_of_paragraph = self._di.get_length_of_paragraphs(text=text, workers=workers)
        indices['DESPL'] = length_of_paragraph.mean
        indices['DESPLd'] = length_of_paragraph.std
        length_of_sentences = self._di.get_length_of_sentences(text=text, workers=workers)
        indices['DESSL'] = length_of_sentences.mean
        indices['DESSLd'] = length_of_sentences.std
        syllables_per_word = self._di.get_syllables_per_word(text=text, workers=workers)
        indices['DESWLsy'] = syllables_per_word.mean
        indices['DESWLsyd'] = syllables_per_word.std
        length_of_words = self._di.get_length_of_words(text=text, workers=workers)
        indices['DESWLlt'] = length_of_words.mean
        indices['DESWLltd'] = length_of_words.std
        return indices

    def calculate_word_information_indices_for_one_text(self, text: str, workers: int=-1, word_count: int=None) -> Dict:
        '''
        This method calculates the descriptive indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.
        word_count(int): The amount of words that the current text has in order to calculate the incidence.

        Returns:
        Dict: The dictionary with the word information indices.
        '''
        indices = {}
        indices['WRDNOUN'] = self._wii.get_noun_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDVERB'] = self._wii.get_verb_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDADJ'] = self._wii.get_adjective_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDADV'] = self._wii.get_adverb_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRO'] = self._wii.get_personal_pronoun_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP1s'] = self._wii.get_personal_pronoun_first_person_singular_form_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP1p'] = self._wii.get_personal_pronoun_first_person_plural_form_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP2s'] = self._wii.get_personal_pronoun_second_person_singular_form_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP2p'] = self._wii.get_personal_pronoun_second_person_plural_form_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP3s'] = self._wii.get_personal_pronoun_third_person_singular_form_incidence(text=text, workers=workers, word_count=word_count)
        indices['WRDPRP3p'] = self._wii.get_personal_pronoun_third_person_plural_form_incidence(text=text, workers=workers, word_count=word_count)
        
        return indices

    def calculate_syntactic_pattern_density_indices_for_one_text(self, text: str, workers: int=-1, word_count: int=None) -> Dict:
        '''
        This method calculates the syntactic pattern indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        word_count(int): The amount of words that the current text has in order to calculate the incidence.

        Returns:
        Dict: The dictionary with the syntactic pattern indices.
        '''
        indices = {}
        indices['DRNP'] = self._spdi.get_noun_phrase_density(text=text, workers=workers, word_count=word_count)
        indices['DRVP'] = self._spdi.get_verb_phrase_density(text=text, workers=workers, word_count=word_count)
        indices['DRNEG'] = self._spdi.get_negation_expressions_density(text=text, workers=workers, word_count=word_count)

        return indices
        
    def calculate_syntactic_complexity_indices_for_one_text(self, text: str, workers: int=-1) -> Dict:
        '''
        This method calculates the syntactic complexity indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.

        Returns:
        Dict: The dictionary with the syntactic complexity indices.
        '''
        indices = {}
        indices['SYNNP'] = self._sci.get_mean_number_of_modifiers_per_noun_phrase(text=text, workers=workers)
        indices['SYNLE'] = self._sci.get_mean_number_of_words_before_main_verb(text=text, workers=workers)

        return indices

    def calculate_connective_indices_for_one_text(self, text: str, workers: int=-1, word_count: int=None) -> Dict:
        '''
        This method calculates the connectives indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.
        word_count(int): The amount of words that the current text has in order to calculate the incidence.

        Returns:
        Dict: The dictionary with the connectives indices.
        '''
        indices = {}
        indices['CNCAll'] = self._ci.get_all_connectives_incidence(text=text, workers=workers, word_count=word_count)
        indices['CNCCaus'] = self._ci.get_causal_connectives_incidence(text=text, workers=workers, word_count=word_count)
        indices['CNCLogic'] = self._ci.get_logical_connectives_incidence(text=text, workers=workers, word_count=word_count)
        indices['CNCADC'] = self._ci.get_adversative_connectives_incidence(text=text, workers=workers, word_count=word_count)
        indices['CNCTemp'] = self._ci.get_temporal_connectives_incidence(text=text, workers=workers, word_count=word_count)
        indices['CNCAdd'] = self._ci.get_additive_connectives_incidence(text=text, workers=workers, word_count=word_count)

        return indices

    def calculate_lexical_diversity_indices_for_one_text(self, text: str, workers: int=-1) -> Dict:
        '''
        This method calculates the lexical diversity indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.
        word_count(int): The amount of words that the current text has in order to calculate the incidence.

        Returns:
        Dict: The dictionary with the lexical diversity indices.
        '''
        indices = {}
        indices['LDTTRa'] = self._ldi.get_type_token_ratio_between_all_words(text=text, workers=workers)
        indices['LDTTRcw'] = self._ldi.get_type_token_ratio_of_content_words(text=text, workers=workers)

        return indices

    def calculate_readability_indices_for_one_text(self, text: str, workers: int=-1, mean_syllables_per_word: int=None, mean_words_per_sentence: int=None) -> Dict:
        '''
        This method calculates the readability indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.
        mean_syllables_per_word(int): The mean of syllables per word in the text.
        mean_words_per_sentence(int): The mean amount of words per sentences in the text.

        Returns:
        Dict: The dictionary with the readability indices.
        '''
        indices = {}
        
        if self.language == 'en':
            indices['RDFHGL'] = self._ri.calculate_fernandez_huertas_grade_level(text=text, workers=workers, mean_words_per_sentence=mean_words_per_sentence, mean_syllables_per_word=mean_syllables_per_word)

        return indices

    def calculate_all_indices_for_one_text(self, text: str, workers: int=-1) -> (Dict, Dict, Dict, Dict, Dict, Dict, Dict):
        '''
        This method calculates all indices and stores them in a dictionary.

        Parameters:
        text(str): The text to be analyzed.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.

        Returns:
        (Dict, Dict, Dict, Dict, Dict, Dict, Dict, Dict): The dictionary with the all the indices.
        '''
        if workers == 0 or workers < -1:
            raise ValueError('Workers must be -1 or any positive number greater than 0.')
        else:
            start = time.time()
            descriptive = self.calculate_descriptive_indices_for_one_text(text=text, workers=workers)
            word_count = descriptive['DESWC']
            mean_words_per_sentence = descriptive['DESSL']
            mean_syllables_per_word = descriptive['DESWLsy']
            word_information = self.calculate_word_information_indices_for_one_text(text=text, workers=workers, word_count=word_count)
            syntactic_pattern = self.calculate_syntactic_pattern_density_indices_for_one_text(text=text, workers=workers, word_count=word_count)
            syntactic_complexity = self.calculate_syntactic_complexity_indices_for_one_text(text=text, workers=workers)
            connective = self.calculate_connective_indices_for_one_text(text=text, workers=workers, word_count=word_count)
            lexical_diversity = self.calculate_lexical_diversity_indices_for_one_text(text=text, workers=workers)
            readability = self.calculate_readability_indices_for_one_text(text, workers=workers, mean_words_per_sentence=mean_words_per_sentence, mean_syllables_per_word=mean_syllables_per_word)
            end = time.time()
            print(f'Text analyzed in {end - start} seconds.')

            return descriptive, word_information, syntactic_pattern, syntactic_complexity, connective, lexical_diversity, readability

    def predict_text_category(self, text: str, workers: int=-1, classifier=None, scaler=None, indices: List=None) -> int:
        '''
        This method receives a text and predict its category based on the classification model trained.

        Parameters:
        text(str): The text to predict its category.
        workers(int): Amount of threads that will complete this operation. If it's -1 then all cpu cores will be used.
        classifier: Optional. A supervised learning model that implements the 'predict' method. If None, the default classifier is used.
        scaler: Optional. A object that implements the 'transform' method that scales the indices of the text to analyze. It must be the same as the one used in the classifier, if a scaler was used. Pass None if no scaler was used during the custom classifier's training.
        indices(List): Optional. Ignored if the default classifier is used. The name indices which the classifier was trained with. They must be in the same order as the ones that were used at training and also be the same. 

        Returns:
        int: The category of the text represented as a number
        '''
        if workers == 0 or workers < -1:
            raise ValueError('Workers must be -1 or any positive number greater than 0.')
        if classifier is not None and not hasattr(classifier, 'predict'):
            raise ValueError('The custom surpervised learning model (classifier) must have the \'predict\' method.d')
        if classifier is not None and indices is None:
            raise ValueError('You must provide the names of the metrics used to train the custom classifier in the same order and amount that they were at the time of training said classifier.')
        if classifier is not None and scaler is not None and not hasattr(scaler, 'transform'):
            raise ValueError('The custom scaling model (scaler) for the custom classifier must have the \'transform\' method.')
        else:
            descriptive, word_information, syntactic_pattern, syntactic_complexity, connective, lexical_diversity, readability = self.calculate_all_indices_for_one_text(text, workers)
            metrics = {**descriptive, **word_information, **syntactic_pattern, **syntactic_complexity, **connective, **lexical_diversity, **readability}
            print('metrics', metrics)
            if classifier is None: # Default indices
                print(TextComplexityAnalyzer)
                indices_values = [[metrics[key] for key in self.indices]]
                

                return self._classifier.predict(self._scaler.transform(indices_values))
            else: # Indices used by the custom classifier
                indices_values = [[metrics[key] for key in indices]]
                    
                return list(classifier.predict(indices_values if scaler is None else scaler.transform(indices_values)))