|
from typing import Dict, List, Any |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
class EndpointHandler(): |
|
def __init__(self, path=""): |
|
torch.set_default_device("cuda") |
|
self.model = AutoModelForCausalLM.from_pretrained("chuckfinca/arithmephi", torch_dtype="auto", trust_remote_code=True, device_map = 'cuda') |
|
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
input = data.get("inputs", data) |
|
inputs = self.tokenizer(input, return_tensors="pt", return_attention_mask=False).to('cuda') |
|
outputs = self.model.generate(**inputs, max_length=len(inputs[0]) + 8, pad_token_id=self.tokenizer.eos_token_id) |
|
text = self.tokenizer.batch_decode(outputs)[0] |
|
return text |