Spaces:

shuklaRishabh
/

hate-speech-api

Sleeping

App Files Files Community

Rishabh Shukla commited on Sep 28

Commit

7c6592f

2 Parent(s): 6a25952 5d32d9a

Move tf_model.h5 into saved_model folder

Browse files

Files changed (7) hide show

Dockerfile +17 -0
app.py +71 -0
requirements.txt +8 -0
tf_model.h5 → saved_model/config.json +2 -2
saved_model/special_tokens_map.json +3 -0
saved_model/tokenizer_config.json +3 -0
saved_model/vocab.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application's code into the container
+COPY . .
+EXPOSE 7860
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from flask import Flask, request, jsonify
+from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
+import tensorflow as tf
+import re
+# Initialize Flask app
+app = Flask(__name__)
+@app.route('/')
+def health_check():
+    return jsonify({"status": "healthy"}), 200
+# --- Load Model and Tokenizer ---
+# Load the fine-tuned model and tokenizer from the saved directory
+model_path = "./saved_model"
+tokenizer = DistilBertTokenizer.from_pretrained(model_path)
+model = TFDistilBertForSequenceClassification.from_pretrained(model_path)
+print("Model and Tokenizer loaded successfully!")
+# --- Helper Function for Text Cleaning ---
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r'@[a-zA-Z0-9_]+', '', text)
+    text = re.sub(r'https?://[A-Za-z0-9./]+', '', text)
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+# --- Define the Prediction Endpoint ---
+@app.route('/predict', methods=['POST'])
+def predict():
+    try:
+        # Get text from the request's JSON body
+        data = request.get_json()
+        text = data['text']
+        if not text:
+            return jsonify({"error": "Text field is required"}), 400
+        # Clean the input text
+        cleaned_text = clean_text(text)
+        # Tokenize the text
+        inputs = tokenizer(cleaned_text, return_tensors="tf", truncation=True, padding=True, max_length=128)
+        # Make a prediction
+        outputs = model(inputs)
+        logits = outputs.logits
+        probabilities = tf.nn.softmax(logits, axis=-1)[0].numpy()
+        prediction = tf.argmax(logits, axis=-1).numpy()[0]
+        # Map prediction index to label
+        labels = ['Non-Toxic', 'Toxic (Hate Speech/Offensive)']
+        result_label = labels[prediction]
+        confidence = float(probabilities[prediction])
+        # Return the result
+        return jsonify({
+            "text": text,
+            "prediction": result_label,
+            "confidence": f"{confidence:.4f}"
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Run the app
+if __name__ == '__main__':
+    # Use 0.0.0.0 to make it accessible from other Docker containers
+    app.run(host='0.0.0.0', port=5000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask
+gunicorn
+tensorflow
+transformers
+pandas
+numpy
+python-dotenv
+tf-keras

tf_model.h5 → saved_model/config.json RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2629205c79aa8482c01c125732f0f96e7f6b42d8edec1cacf176ac5376669470
-size 267955144

 version https://git-lfs.github.com/spec/v1
+oid sha256:19ce9f2958fffb779413a12f67a5ae4540cc136ffcebabdce3f5cecae1a4e1b3
+size 492

saved_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
+size 125

saved_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7aea1a75760cc3312ad93f27a6119976b2046382760286eb80dee50978c02d7
+size 1278

saved_model/vocab.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
+size 231508