import os.path

from tokenizers import ByteLevelBPETokenizer, Tokenizer
from typing import Dict, List, Any
from transformers import pipeline
import torch
import torch.nn as nn
from huggingface_hub import PyTorchModelHubMixin

embedding_dim = 128
rnn_units = 256
vocab_size = 8000


def get_model():
    class AurelioRNN(nn.Module, PyTorchModelHubMixin):
        def __init__(self, config: dict):
            super().__init__()
            vocab_size = config.get("vocab_size")
            embedding_dim = config.get("embedding_dim")
            rnn_units = config.get("rnn_units")
            self.config = config
            self.embedding = nn.Embedding(vocab_size, embedding_dim)
            self.lstm = nn.LSTM(embedding_dim, rnn_units, batch_first=True)
            self.fc = nn.Linear(rnn_units, vocab_size)

        def forward(self, x, state):
            x = self.embedding(x)
            x, state = self.lstm(x, state)
            x = self.fc(x)
            return x, state

        def init_state(self, batch_size):
            return (
                torch.zeros(1, batch_size, rnn_units).to("cpu"),
                torch.zeros(1, batch_size, rnn_units).to("cpu"),
            )

    return AurelioRNN


class EndpointHandler:
    def __init__(self, path=""):
        # load the optimized model
        config = {
            "vocab_size": vocab_size,
            "embedding_dim": embedding_dim,
            "rnn_units": rnn_units,
        }
        lstm = get_model()
        model = lstm.from_pretrained("jed-tiotuico/aurelio-rnn", config=config)

        dir_path = os.path.abspath(os.path.dirname(__file__))
        tokenizer = ByteLevelBPETokenizer(
            os.path.join(dir_path, "aurelio_bpe-vocab.json"),
            os.path.join(dir_path, "aurelio_bpe-merges.txt"),
        )
        # create inference pipeline
        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
                - "label": A string representing what the label/class is. There can be multiple labels.
                - "score": A score between 0 and 1 describing how confident the model is for this label/class.
        """
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", None)

        # pass inputs with all kwargs in data
        if parameters is not None:
            prediction = self.pipeline(inputs, **parameters)
        else:
            prediction = self.pipeline(inputs)
        # postprocess the prediction
        return prediction