from typing import Dict, List, Any import torch from transformers import AutoModelForCausalLM, AutoTokenizer class EndpointHandler(): def __init__(self, path=""): torch.set_default_device("cuda") self.model = AutoModelForCausalLM.from_pretrained("chuckfinca/arithmephi", torch_dtype="auto", trust_remote_code=True, device_map = 'cuda') self.tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: input = data.get("inputs", data) inputs = self.tokenizer(input, return_tensors="pt", return_attention_mask=False).to('cuda') outputs = self.model.generate(**inputs, max_length=len(inputs[0]) + 8, pad_token_id=self.tokenizer.eos_token_id) text = self.tokenizer.batch_decode(outputs)[0] return text