import os.path from tokenizers import ByteLevelBPETokenizer, Tokenizer from typing import Dict, List, Any from transformers import pipeline import torch import torch.nn as nn from huggingface_hub import PyTorchModelHubMixin embedding_dim = 128 rnn_units = 256 vocab_size = 8000 def get_model(): class AurelioRNN(nn.Module, PyTorchModelHubMixin): def __init__(self, config: dict): super().__init__() vocab_size = config.get("vocab_size") embedding_dim = config.get("embedding_dim") rnn_units = config.get("rnn_units") self.config = config self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, rnn_units, batch_first=True) self.fc = nn.Linear(rnn_units, vocab_size) def forward(self, x, state): x = self.embedding(x) x, state = self.lstm(x, state) x = self.fc(x) return x, state def init_state(self, batch_size): return ( torch.zeros(1, batch_size, rnn_units).to("cpu"), torch.zeros(1, batch_size, rnn_units).to("cpu"), ) return AurelioRNN class EndpointHandler: def __init__(self, path=""): # load the optimized model config = { "vocab_size": vocab_size, "embedding_dim": embedding_dim, "rnn_units": rnn_units, } lstm = get_model() model = lstm.from_pretrained("jed-tiotuico/aurelio-rnn", config=config) dir_path = os.path.abspath(os.path.dirname(__file__)) tokenizer = ByteLevelBPETokenizer( os.path.join(dir_path, "aurelio_bpe-vocab.json"), os.path.join(dir_path, "aurelio_bpe-merges.txt"), ) # create inference pipeline self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) def __call__(self, data: Any) -> List[List[Dict[str, float]]]: """ Args: data (:obj:): includes the input data and the parameters for the inference. Return: A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing : - "label": A string representing what the label/class is. There can be multiple labels. - "score": A score between 0 and 1 describing how confident the model is for this label/class. """ inputs = data.pop("inputs", data) parameters = data.pop("parameters", None) # pass inputs with all kwargs in data if parameters is not None: prediction = self.pipeline(inputs, **parameters) else: prediction = self.pipeline(inputs) # postprocess the prediction return prediction