Rishabh Shukla commited on
Commit
7c6592f
·
2 Parent(s): 6a25952 5d32d9a

Move tf_model.h5 into saved_model folder

Browse files
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application's code into the container
14
+ COPY . .
15
+
16
+ EXPOSE 7860
17
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "app:app"]
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
3
+ import tensorflow as tf
4
+ import re
5
+
6
+ # Initialize Flask app
7
+ app = Flask(__name__)
8
+
9
+ @app.route('/')
10
+ def health_check():
11
+ return jsonify({"status": "healthy"}), 200
12
+
13
+ # --- Load Model and Tokenizer ---
14
+ # Load the fine-tuned model and tokenizer from the saved directory
15
+ model_path = "./saved_model"
16
+ tokenizer = DistilBertTokenizer.from_pretrained(model_path)
17
+ model = TFDistilBertForSequenceClassification.from_pretrained(model_path)
18
+
19
+ print("Model and Tokenizer loaded successfully!")
20
+
21
+ # --- Helper Function for Text Cleaning ---
22
+ def clean_text(text):
23
+ text = text.lower()
24
+ text = re.sub(r'@[a-zA-Z0-9_]+', '', text)
25
+ text = re.sub(r'https?://[A-Za-z0-9./]+', '', text)
26
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
27
+ text = re.sub(r'\s+', ' ', text).strip()
28
+ return text
29
+
30
+ # --- Define the Prediction Endpoint ---
31
+ @app.route('/predict', methods=['POST'])
32
+ def predict():
33
+ try:
34
+ # Get text from the request's JSON body
35
+ data = request.get_json()
36
+ text = data['text']
37
+
38
+ if not text:
39
+ return jsonify({"error": "Text field is required"}), 400
40
+
41
+ # Clean the input text
42
+ cleaned_text = clean_text(text)
43
+
44
+ # Tokenize the text
45
+ inputs = tokenizer(cleaned_text, return_tensors="tf", truncation=True, padding=True, max_length=128)
46
+
47
+ # Make a prediction
48
+ outputs = model(inputs)
49
+ logits = outputs.logits
50
+ probabilities = tf.nn.softmax(logits, axis=-1)[0].numpy()
51
+ prediction = tf.argmax(logits, axis=-1).numpy()[0]
52
+
53
+ # Map prediction index to label
54
+ labels = ['Non-Toxic', 'Toxic (Hate Speech/Offensive)']
55
+ result_label = labels[prediction]
56
+ confidence = float(probabilities[prediction])
57
+
58
+ # Return the result
59
+ return jsonify({
60
+ "text": text,
61
+ "prediction": result_label,
62
+ "confidence": f"{confidence:.4f}"
63
+ })
64
+
65
+ except Exception as e:
66
+ return jsonify({"error": str(e)}), 500
67
+
68
+ # Run the app
69
+ if __name__ == '__main__':
70
+ # Use 0.0.0.0 to make it accessible from other Docker containers
71
+ app.run(host='0.0.0.0', port=5000)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ gunicorn
3
+ tensorflow
4
+ transformers
5
+ pandas
6
+ numpy
7
+ python-dotenv
8
+ tf-keras
tf_model.h5 → saved_model/config.json RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2629205c79aa8482c01c125732f0f96e7f6b42d8edec1cacf176ac5376669470
3
- size 267955144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ce9f2958fffb779413a12f67a5ae4540cc136ffcebabdce3f5cecae1a4e1b3
3
+ size 492
saved_model/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
3
+ size 125
saved_model/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7aea1a75760cc3312ad93f27a6119976b2046382760286eb80dee50978c02d7
3
+ size 1278
saved_model/vocab.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
3
+ size 231508