File size: 862 Bytes
72e7a06 e557400 09434f4 e557400 72e7a06 fb55074 e557400 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
from typing import Dict, List, Any
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
class EndpointHandler():
def __init__(self, path=""):
torch.set_default_device("cuda")
self.model = AutoModelForCausalLM.from_pretrained("chuckfinca/arithmephi", torch_dtype="auto", trust_remote_code=True, device_map = 'cuda')
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
input = data.get("inputs", data)
inputs = self.tokenizer(input, return_tensors="pt", return_attention_mask=False).to('cuda')
outputs = self.model.generate(**inputs, max_length=len(inputs[0]) + 8, pad_token_id=self.tokenizer.eos_token_id)
text = self.tokenizer.batch_decode(outputs)[0]
return text |