Initial commit with model and code

Browse files

Files changed (4) hide show

endpoint.py +28 -0
handler.py +42 -0
model.joblib +3 -0
requirements.txt +3 -0

endpoint.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from huggingface_hub import InferenceClient, create_inference_endpoint
+# Create the inference endpoint
+endpoint = create_inference_endpoint(
+    name="my-custom-endpoint",
+    repository="path/to/your/model/repository",
+    framework="custom",
+    task="text-classification",
+    accelerator="cpu",  # or "gpu" if needed
+    instance_size="medium",
+    instance_type="c6i",
+    region="us-east-1",
+    custom_image={
+        "health_route": "/healthz",
+        "port": 8080,
+        "url": "your-docker-image-url:latest"
+    }
+)
+# Wait for the endpoint to be ready
+endpoint.wait()
+# Create a client to interact with the endpoint
+client = InferenceClient(endpoint.url)
+# Test the endpoint
+result = client.text_classification("This is a test input")
+print(result)

handler.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import torch
+from joblib import load
+from transformers import BertTokenizer
+def load_model(model_path):
+    return load(model_path)
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Load the model in the __init__ method
+        self.model = load_model(os.path.join(path, "model.joblib"))
+        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+    def __call__(self, data):
+        inputs = data.pop("inputs", data)
+        # Ensure inputs is a list
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        # Tokenize inputs
+        encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
+        # Move inputs to the correct device
+        input_ids = encoded_inputs['input_ids'].to(self.device)
+        attention_mask = encoded_inputs['attention_mask'].to(self.device)
+        # Perform inference
+        with torch.no_grad():
+            outputs = self.model(input_ids, attention_mask=attention_mask)
+            logits = outputs.logits
+            probabilities = torch.nn.functional.softmax(logits, dim=-1)
+            predictions = torch.argmax(probabilities, dim=-1)
+        # Convert predictions to human-readable labels
+        class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
+        results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
+        return {"predictions": results}

model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0b21b9462a2a3b8a360588252e9cacf970a03502a4b9f964548d09213295ca2
+size 1122320204

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers
+joblib