| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | |
| | self.tokenizer = AutoTokenizer.from_pretrained(path) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(path) |
| | self.model.eval() |
| | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | self.model.to(self.device) |
| |
|
| | def __call__(self, data): |
| | """ |
| | This method is called when the endpoint receives a request. |
| | Expected input: { "inputs": "some string" } or { "inputs": ["a", "b", ...] } |
| | """ |
| | inputs = data.get("inputs", None) |
| |
|
| | if inputs is None: |
| | return {"error": "No input provided"} |
| |
|
| | if isinstance(inputs, str): |
| | inputs = [inputs] |
| |
|
| | results = [] |
| | for text in inputs: |
| | encoded = self.tokenizer( |
| | text, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding="max_length", |
| | max_length=4096, |
| | ) |
| | encoded = {k: v.to(self.device) for k, v in encoded.items()} |
| |
|
| | with torch.no_grad(): |
| | outputs = self.model(**encoded) |
| |
|
| | raw_score = outputs.logits.squeeze().item() |
| | clipped_score = min(max(raw_score, 0.0), 1.0) |
| |
|
| | results.append({"score": round(clipped_score, 4)}) |
| |
|
| | return results |