import abc

from huggingface_hub import login
from transformers.tools import TextSummarizationTool
from transformers import HfAgent
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig

from gistillery.config import get_config


agent = None


def get_agent() -> HfAgent:
    global agent
    if agent is None:
        login(get_config().hf_hub_token)
        agent = HfAgent(get_config().hf_agent)
    return agent


class Summarizer(abc.ABC):
    @abc.abstractmethod
    def get_name(self) -> str:
        raise NotImplementedError

    @abc.abstractmethod
    def __call__(self, x: str) -> str:
        raise NotImplementedError


class HfDefaultSummarizer(Summarizer):
    def __init__(self) -> None:
        self.summarizer = TextSummarizationTool()

    def get_name(self) -> str:
        return "hf_default"

    def __call__(self, x: str) -> str:
        summary = self.summarizer(x)
        assert isinstance(summary, str)
        return summary


class Tagger(abc.ABC):
    @abc.abstractmethod
    def get_name(self) -> str:
        raise NotImplementedError

    @abc.abstractmethod
    def __call__(self, x: str) -> list[str]:
        raise NotImplementedError


class HfDefaultTagger(Tagger):
    def __init__(self, model_name: str = "google/flan-t5-large") -> None:
        self.model_name = model_name

        config = GenerationConfig.from_pretrained(self.model_name)
        config.max_new_tokens = 50
        config.min_new_tokens = 25
        # increase the temperature to make the model more creative
        config.temperature = 1.5

        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.generation_config = config

        self.template = (
            "Create a list of tags for the text below. The tags should be high level "
            "and specific. Return the results as a comma separated list.\n\n"
            "{}\n\nTags:\n"
        )

    def _extract_tags(self, text: str) -> list[str]:
        tags = {"#general"}
        for tag in text.split(","):
            tag = tag.strip().lower().replace(" ", "")
            if not tag.startswith("#"):
                tag = "#" + tag
            tags.add(tag)
        return sorted(tags)

    def __call__(self, x: str) -> list[str]:
        # should not return duplicates
        text = self.template.format(x)
        inputs = self.tokenizer(text, return_tensors="pt")
        outputs = self.model.generate(
            **inputs, generation_config=self.generation_config
        )
        output = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        tags = self._extract_tags(output)
        return tags

    def get_name(self) -> str:
        return f"{self.__class__.__name__}({self.model_name})"