menimeni123 commited on
Commit
924a7c9
·
1 Parent(s): d9bdc12

Initial commit with model and code

Browse files
Files changed (4) hide show
  1. endpoint.py +28 -0
  2. handler.py +42 -0
  3. model.joblib +3 -0
  4. requirements.txt +3 -0
endpoint.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient, create_inference_endpoint
2
+
3
+ # Create the inference endpoint
4
+ endpoint = create_inference_endpoint(
5
+ name="my-custom-endpoint",
6
+ repository="path/to/your/model/repository",
7
+ framework="custom",
8
+ task="text-classification",
9
+ accelerator="cpu", # or "gpu" if needed
10
+ instance_size="medium",
11
+ instance_type="c6i",
12
+ region="us-east-1",
13
+ custom_image={
14
+ "health_route": "/healthz",
15
+ "port": 8080,
16
+ "url": "your-docker-image-url:latest"
17
+ }
18
+ )
19
+
20
+ # Wait for the endpoint to be ready
21
+ endpoint.wait()
22
+
23
+ # Create a client to interact with the endpoint
24
+ client = InferenceClient(endpoint.url)
25
+
26
+ # Test the endpoint
27
+ result = client.text_classification("This is a test input")
28
+ print(result)
handler.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from joblib import load
4
+ from transformers import BertTokenizer
5
+
6
+ def load_model(model_path):
7
+ return load(model_path)
8
+
9
+ class EndpointHandler:
10
+ def __init__(self, path=""):
11
+ # Load the model in the __init__ method
12
+ self.model = load_model(os.path.join(path, "model.joblib"))
13
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
14
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ self.model.to(self.device)
16
+
17
+ def __call__(self, data):
18
+ inputs = data.pop("inputs", data)
19
+
20
+ # Ensure inputs is a list
21
+ if isinstance(inputs, str):
22
+ inputs = [inputs]
23
+
24
+ # Tokenize inputs
25
+ encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt")
26
+
27
+ # Move inputs to the correct device
28
+ input_ids = encoded_inputs['input_ids'].to(self.device)
29
+ attention_mask = encoded_inputs['attention_mask'].to(self.device)
30
+
31
+ # Perform inference
32
+ with torch.no_grad():
33
+ outputs = self.model(input_ids, attention_mask=attention_mask)
34
+ logits = outputs.logits
35
+ probabilities = torch.nn.functional.softmax(logits, dim=-1)
36
+ predictions = torch.argmax(probabilities, dim=-1)
37
+
38
+ # Convert predictions to human-readable labels
39
+ class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
40
+ results = [{"label": class_names[pred], "score": prob[pred].item()} for pred, prob in zip(predictions, probabilities)]
41
+
42
+ return {"predictions": results}
model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b21b9462a2a3b8a360588252e9cacf970a03502a4b9f964548d09213295ca2
3
+ size 1122320204
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ joblib