Spaces:
Sleeping
Sleeping
lostUchiha
commited on
Commit
•
a3e6572
1
Parent(s):
35699f3
app
Browse files- app.py +100 -0
- spam_detection_model/config.json +23 -0
- spam_detection_model/tf_model.h5 +3 -0
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizerFast
|
3 |
+
import gradio as gr
|
4 |
+
import random
|
5 |
+
|
6 |
+
# Load the model architecture and weights
|
7 |
+
model = TFDistilBertForSequenceClassification.from_pretrained("spam_detection_model/")
|
8 |
+
|
9 |
+
# Load the tokenizer
|
10 |
+
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
|
11 |
+
|
12 |
+
|
13 |
+
# Define the process_input function
|
14 |
+
def process_input(text):
|
15 |
+
# Preprocess the sample text using the tokenizer
|
16 |
+
encodings = tokenizer(text, truncation=True, padding=True, return_tensors="tf")
|
17 |
+
|
18 |
+
# Perform inference
|
19 |
+
logits = model(encodings.input_ids).logits
|
20 |
+
|
21 |
+
# Convert logits to probabilities using softmax
|
22 |
+
probabilities = tf.nn.softmax(logits, axis=-1)
|
23 |
+
|
24 |
+
# Get the predicted class
|
25 |
+
predicted_class = tf.argmax(probabilities, axis=-1).numpy()[0]
|
26 |
+
|
27 |
+
# Map the predicted class to label
|
28 |
+
label_mapping = {
|
29 |
+
0: '<b><div style="font-size:16px; text-align:center;">No need to worry, Not a spam message.</div></b>',
|
30 |
+
1: '<b><div style="font-size:16px; color:#ff3b5c; text-align:center;">Warning⚠️: This message has been identified as spam.</div></b>',
|
31 |
+
}
|
32 |
+
predicted_label = label_mapping[predicted_class]
|
33 |
+
|
34 |
+
return [
|
35 |
+
{
|
36 |
+
"Spam": float(probabilities.numpy()[0][1]),
|
37 |
+
"Not a Spam": float(probabilities.numpy()[0][0]),
|
38 |
+
},
|
39 |
+
predicted_label,
|
40 |
+
]
|
41 |
+
|
42 |
+
|
43 |
+
# Define the Gradio interface
|
44 |
+
title = "Spam Detector⚠️"
|
45 |
+
examples = [
|
46 |
+
"Dear Customer, Your account has been compromised. Click the link below to verify your account details immediately or risk suspension. **(Example 1)**",
|
47 |
+
"You've been selected as the lucky winner of our international sweepstakes! To claim your prize, reply with your full name, address, and bank details. <font color='blue' style='background-color: lightgray;'>(Example 2)</font>",
|
48 |
+
"Congratulations! You've won a free iPhone X. Click the link to claim your prize.",
|
49 |
+
"URGENT: Your bank account has been compromised. Click here to reset your password.",
|
50 |
+
"Get rich quick! Invest in our exclusive program and earn thousands overnight.",
|
51 |
+
"Your prescription refill is ready for pickup at your local pharmacy. Visit us at your convenience",
|
52 |
+
"Reminder: Your monthly utility bill is due on August 20th. Please make the payment.",
|
53 |
+
"You've been selected as the lucky winner of a million-dollar lottery. Reply to claim.",
|
54 |
+
"Limited time offer: Double your money with our amazing investment opportunity.",
|
55 |
+
"Hi, just checking in to see how you're doing. Let's catch up soon.",
|
56 |
+
"Reminder: Your dentist appointment is scheduled for tomorrow at 2 PM.",
|
57 |
+
"Invitation: Join us for a webinar on digital marketing strategies. Register now!",
|
58 |
+
"Your application for the scholarship has been reviewed. We're pleased to inform you that you've been selected.",
|
59 |
+
"Hi there! Just wanted to check in and see how you're doing.",
|
60 |
+
"Reminder: Your friend's birthday is coming up. Don't forget to send them a message.",
|
61 |
+
"Thank you for your purchase. Your order has been successfully processed.",
|
62 |
+
"Your monthly newsletter is here! Stay updated with the latest news and updates.",
|
63 |
+
"Invitation: Join us for a community clean-up event this weekend. Let's make a difference together.",
|
64 |
+
"Reminder: Your scheduled appointment is tomorrow. We look forward to seeing you.",
|
65 |
+
"Good news! You've earned a reward for your loyalty. Check your account for details.",
|
66 |
+
"Your recent transaction has been approved. Please keep this email for your records.",
|
67 |
+
"Exciting announcement: Our new store location is now open. Visit us and receive a special discount.",
|
68 |
+
"Welcome to our online community! Here's how to get started and connect with others.",
|
69 |
+
"Your request has been received and is being processed. We'll update you with the status soon.",
|
70 |
+
"Upcoming event: Join us for a free cooking class this Saturday. Learn new recipes and techniques.",
|
71 |
+
"Reminder: Don't forget to vote in the upcoming election. Your voice matters.",
|
72 |
+
"Join our book club and dive into a world of fascinating stories. Here's how to join.",
|
73 |
+
]
|
74 |
+
|
75 |
+
|
76 |
+
# Create Gradio components
|
77 |
+
input_text = gr.Textbox(
|
78 |
+
lines=3, label="Enter the SMS/Message/Email you received", autofocus=True
|
79 |
+
)
|
80 |
+
output_text = gr.HTML("", label="Output")
|
81 |
+
probabilities_text = gr.Label("", label="Probabilities")
|
82 |
+
|
83 |
+
random.shuffle(examples)
|
84 |
+
|
85 |
+
# Initialize the Gradio interface
|
86 |
+
model_gui = gr.Interface(
|
87 |
+
fn=process_input,
|
88 |
+
inputs=input_text,
|
89 |
+
outputs=[probabilities_text, output_text],
|
90 |
+
title=title,
|
91 |
+
examples=examples,
|
92 |
+
interpretation="default",
|
93 |
+
theme="shivi/calm_seafoam",
|
94 |
+
css="""*{font-family:'IBM Plex Mono';}""",
|
95 |
+
examples_per_page=15,
|
96 |
+
)
|
97 |
+
|
98 |
+
# Launch the Gradio interface
|
99 |
+
print("add '/?__theme=dark' to URL for rendering in dark theme.")
|
100 |
+
model_gui.launch()
|
spam_detection_model/config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"qa_dropout": 0.1,
|
18 |
+
"seq_classif_dropout": 0.2,
|
19 |
+
"sinusoidal_pos_embds": false,
|
20 |
+
"tie_weights_": true,
|
21 |
+
"transformers_version": "4.31.0",
|
22 |
+
"vocab_size": 30522
|
23 |
+
}
|
spam_detection_model/tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74b314fc3657a9f17379df8e858949536024f0cd090962752662e0479c948162
|
3 |
+
size 267951808
|