import os import joblib import torch import numpy as np from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification import torch.nn.functional as F class EndpointHandler: def __init__(self, model_dir): self.model = DistilBertForSequenceClassification.from_pretrained(model_dir) self.tokenizer = DistilBertTokenizerFast.from_pretrained(model_dir) self.label_mapping = joblib.load(os.path.join(model_dir, "label_mapping.joblib")) self.labels = {v: k for k, v in self.label_mapping.items()} def __call__(self, inputs): if isinstance(inputs, dict) and 'inputs' in inputs: return self.predict(inputs['inputs']) return self.predict(inputs) def predict(self, text): if len(text.split()) < 4: return {"label": "SAFE", "score": 1.0} encoded_input = self.tokenizer(text, return_tensors='pt', truncation=True, max_length=128) with torch.no_grad(): outputs = self.model(**encoded_input) probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()[0] confidence = np.max(probabilities) predicted_label_idx = int(np.argmax(probabilities)) predicted_label = self.labels[predicted_label_idx] return {"label": predicted_label, "score": float(confidence)} def get_pipeline(): return EndpointHandler