menimeni123
/

helem-llm

Joblib

Safetensors

distilbert

Model card Files Files and versions Community

menimeni123 commited on Sep 12, 2024

Commit

7f7dfaf

1 Parent(s): 924a7c9

Updated handler implementation

Browse files

Files changed (1) hide show

handler.py +96 -32

handler.py CHANGED Viewed

@@ -1,42 +1,106 @@
 import os
 import torch
 from joblib import load
-from transformers import BertTokenizer
 def load_model(model_path):
-    return load(model_path)
 class EndpointHandler:
     def __init__(self, path=""):
-        # Load the model in the __init__ method
-        self.model = load_model(os.path.join(path, "model.joblib"))
-        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
     def __call__(self, data):
-        inputs = data.pop("inputs", data)
-        # Ensure inputs is a list
-        if isinstance(inputs, str):
-            inputs = [inputs]
-        # Tokenize inputs
-        encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
-        # Move inputs to the correct device
-        input_ids = encoded_inputs['input_ids'].to(self.device)
-        attention_mask = encoded_inputs['attention_mask'].to(self.device)
-        # Perform inference
-        with torch.no_grad():
-            outputs = self.model(input_ids, attention_mask=attention_mask)
-            logits = outputs.logits
-            probabilities = torch.nn.functional.softmax(logits, dim=-1)
-            predictions = torch.argmax(probabilities, dim=-1)
-        # Convert predictions to human-readable labels
-        class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
-        results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
-        return {"predictions": results}

 import os
 import torch
 from joblib import load
+from transformers import BertTokenizer, BertModel
+from transformers.models.bert.modeling_bert import BertSelfAttention
+class BertSdpaSelfAttention(BertSelfAttention):
+    def __init__(self, config):
+        super().__init__(config)
+        # Add any custom initialization here
+        self.sdpa_head = torch.nn.Linear(config.hidden_size, config.hidden_size)
+    def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, past_key_value=None, output_attentions=False):
+        # Custom forward pass
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            attention_scores = attention_scores + attention_mask
+        attention_probs = torch.nn.functional.softmax(attention_scores, dim=-1)
+        attention_probs = self.dropout(attention_probs)
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        # Apply SDPA head
+        sdpa_output = self.sdpa_head(context_layer)
+        outputs = (sdpa_output, attention_probs) if output_attentions else (sdpa_output,)
+        return outputs
+# Register the custom class
+setattr(torch.nn.modules, 'BertSdpaSelfAttention', BertSdpaSelfAttention)
 def load_model(model_path):
+    try:
+        return load(model_path)
+    except AttributeError as e:
+        print(f"Error loading model: {e}")
+        print("Ensure all custom classes are properly defined.")
+        raise
 class EndpointHandler:
     def __init__(self, path=""):
+        try:
+            # Load the model in the __init__ method
+            model_path = os.path.join(path, "model.joblib")
+            self.model = load_model(model_path)
+            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            self.model.to(self.device)
+        except Exception as e:
+            print(f"Error initializing EndpointHandler: {e}")
+            raise
     def __call__(self, data):
+        try:
+            inputs = data.pop("inputs", data)
+            # Ensure inputs is a list
+            if isinstance(inputs, str):
+                inputs = [inputs]
+            # Tokenize inputs
+            encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
+            # Move inputs to the correct device
+            input_ids = encoded_inputs['input_ids'].to(self.device)
+            attention_mask = encoded_inputs['attention_mask'].to(self.device)
+            # Perform inference
+            with torch.no_grad():
+                outputs = self.model(input_ids, attention_mask=attention_mask)
+                logits = outputs.logits
+                probabilities = torch.nn.functional.softmax(logits, dim=-1)
+                predictions = torch.argmax(probabilities, dim=-1)
+            # Convert predictions to human-readable labels
+            class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
+            results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
+            return {"predictions": results}
+        except Exception as e:
+            print(f"Error during inference: {e}")
+            return {"error": str(e)}
+# For local testing
+if __name__ == "__main__":
+    handler = EndpointHandler()
+    test_input = {"inputs": "This is a test input"}
+    result = handler(test_input)
+    print(result)