menimeni123
/

helem-llm

Joblib

Safetensors

distilbert

Inference Endpoints

Model card Files Files and versions Community

menimeni123 commited on Sep 12, 2024

Commit

5237bb2

•

1 Parent(s): fc9cdc9

latest

Browse files

Files changed (3) hide show

app.py +57 -47
config.json +0 -15
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,55 +1,65 @@
-import torch
 import joblib
-from flask import Flask, request, jsonify
 from transformers import BertTokenizer, BertForSequenceClassification
-import torch.nn.functional as F
-# Initialize Flask application
-app = Flask(__name__)
-# Load model and tokenizer
-model = joblib.load('model.joblib')
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model.eval()
-# Set device to CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-# Inference function
-def classify_text(text):
-    encoding = tokenizer(str(text), truncation=True, padding=True, max_length=128, return_tensors='pt')
-    input_ids = encoding['input_ids'].to(device)
-    attention_mask = encoding['attention_mask'].to(device)
-    with torch.no_grad():
-        outputs = model(input_ids, attention_mask=attention_mask)
-        logits = outputs.logits
-        probabilities = F.softmax(logits, dim=-1)
-        confidence, predicted_class = torch.max(probabilities, dim=-1)
-    class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
-    predicted_label = class_names[predicted_class.item()]
-    confidence_score = confidence.item()
-    return predicted_label, confidence_score
-# Define the inference route
-@app.route('/inference', methods=['POST'])
-def inference():
-    data = request.json
-    if 'text' not in data:
-        return jsonify({"error": "No text provided"}), 400
-    text = data['text']
-    label, confidence = classify_text(text)
-    return jsonify({
-        'text': text,
-        'classification': label,
-        'confidence': confidence
-    })
-# Start the Flask server
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=8080)

+# app.py
+import os
 import joblib
+import torch
 from transformers import BertTokenizer, BertForSequenceClassification
+from torch.nn.functional import softmax
+# Load the tokenizer and model
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+# Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the saved model
+model = joblib.load('model.joblib')
 model.to(device)
+model.eval()
+# Class names
+class_names = ["JAILBREAK", "INJECTION", "PHISHING", "SAFE"]
+def preprocess(text):
+    # Tokenize the input text
+    encoding = tokenizer(
+        text,
+        truncation=True,
+        padding=True,
+        max_length=128,
+        return_tensors='pt'
+    )
+    return encoding
+def inference(model_inputs):
+    """
+    This function will be called for every inference request.
+    """
+    try:
+        # Get the text input
+        text = model_inputs.get('text', None)
+        if text is None:
+            return {'message': 'No text provided for inference.'}
+        # Preprocess the text
+        encoding = preprocess(text)
+        input_ids = encoding['input_ids'].to(device)
+        attention_mask = encoding['attention_mask'].to(device)
+        # Perform inference
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask=attention_mask)
+            logits = outputs.logits
+            probabilities = softmax(logits, dim=-1)
+            confidence, predicted_class = torch.max(probabilities, dim=-1)
+        # Prepare the response
+        predicted_label = class_names[predicted_class.item()]
+        confidence_score = confidence.item()
+        return {
+            'classification': predicted_label,
+            'confidence': confidence_score
+        }
+    except Exception as e:
+        return {'error': str(e)}

config.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-    "model_type": "bert",
-    "num_labels": 4,
-    "hidden_size": 768,
-    "vocab_size": 30522,
-    "hidden_act": "gelu",
-    "initializer_range": 0.02,
-    "layer_norm_eps": 1e-12,
-    "max_position_embeddings": 512,
-    "type_vocab_size": 2,
-    "attention_probs_dropout_prob": 0.1,
-    "hidden_dropout_prob": 0.1,
-    "intermediate_size": 3072
-  }

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
-transformers=4.44.2
 joblib

+# requirements.txt
 torch
+transformers
 joblib