menimeni123
commited on
Commit
·
924a7c9
1
Parent(s):
d9bdc12
Initial commit with model and code
Browse files- endpoint.py +28 -0
- handler.py +42 -0
- model.joblib +3 -0
- requirements.txt +3 -0
endpoint.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import InferenceClient, create_inference_endpoint
|
2 |
+
|
3 |
+
# Create the inference endpoint
|
4 |
+
endpoint = create_inference_endpoint(
|
5 |
+
name="my-custom-endpoint",
|
6 |
+
repository="path/to/your/model/repository",
|
7 |
+
framework="custom",
|
8 |
+
task="text-classification",
|
9 |
+
accelerator="cpu", # or "gpu" if needed
|
10 |
+
instance_size="medium",
|
11 |
+
instance_type="c6i",
|
12 |
+
region="us-east-1",
|
13 |
+
custom_image={
|
14 |
+
"health_route": "/healthz",
|
15 |
+
"port": 8080,
|
16 |
+
"url": "your-docker-image-url:latest"
|
17 |
+
}
|
18 |
+
)
|
19 |
+
|
20 |
+
# Wait for the endpoint to be ready
|
21 |
+
endpoint.wait()
|
22 |
+
|
23 |
+
# Create a client to interact with the endpoint
|
24 |
+
client = InferenceClient(endpoint.url)
|
25 |
+
|
26 |
+
# Test the endpoint
|
27 |
+
result = client.text_classification("This is a test input")
|
28 |
+
print(result)
|
handler.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from joblib import load
|
4 |
+
from transformers import BertTokenizer
|
5 |
+
|
6 |
+
def load_model(model_path):
|
7 |
+
return load(model_path)
|
8 |
+
|
9 |
+
class EndpointHandler:
|
10 |
+
def __init__(self, path=""):
|
11 |
+
# Load the model in the __init__ method
|
12 |
+
self.model = load_model(os.path.join(path, "model.joblib"))
|
13 |
+
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
14 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
+
self.model.to(self.device)
|
16 |
+
|
17 |
+
def __call__(self, data):
|
18 |
+
inputs = data.pop("inputs", data)
|
19 |
+
|
20 |
+
# Ensure inputs is a list
|
21 |
+
if isinstance(inputs, str):
|
22 |
+
inputs = [inputs]
|
23 |
+
|
24 |
+
# Tokenize inputs
|
25 |
+
encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
|
26 |
+
|
27 |
+
# Move inputs to the correct device
|
28 |
+
input_ids = encoded_inputs['input_ids'].to(self.device)
|
29 |
+
attention_mask = encoded_inputs['attention_mask'].to(self.device)
|
30 |
+
|
31 |
+
# Perform inference
|
32 |
+
with torch.no_grad():
|
33 |
+
outputs = self.model(input_ids, attention_mask=attention_mask)
|
34 |
+
logits = outputs.logits
|
35 |
+
probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
36 |
+
predictions = torch.argmax(probabilities, dim=-1)
|
37 |
+
|
38 |
+
# Convert predictions to human-readable labels
|
39 |
+
class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
|
40 |
+
results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
|
41 |
+
|
42 |
+
return {"predictions": results}
|
model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0b21b9462a2a3b8a360588252e9cacf970a03502a4b9f964548d09213295ca2
|
3 |
+
size 1122320204
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
+
joblib
|