Pravincoder commited on
Commit
0f8ae82
1 Parent(s): 8478b3b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Imports
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from tensorflow.keras.preprocessing.text import Tokenizer
7
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
8
+ import gradio
9
+
10
+ ## Load Data
11
+ dataset = pd.read_csv('/content/drive/MyDrive/SPAMtextmessage.csv')
12
+
13
+ ## Data Preprocessing
14
+ # Convert ham to 0 and spam to 1
15
+ dataset['Category']= dataset['Category'].str.replace('ham','0')
16
+ dataset['Category']= dataset['Category'].str.replace('spam','1')
17
+ dataset['Category']= dataset['Category'].astype(int)
18
+ sentences = dataset['Message'].tolist()
19
+ labels = dataset['Category'].tolist()
20
+ # Separate out the sentences and labels into training and test sets
21
+ training_size = int(len(sentences) * 0.8)
22
+ # Sentence variables
23
+ training_sentences = sentences[0:training_size]
24
+ testing_sentences = sentences[training_size:]
25
+ # Labels variables
26
+ training_labels = labels[0:training_size]
27
+ testing_labels = labels[training_size:]
28
+ # Make labels into numpy arrays for use with the network later
29
+ training_labels_final = np.array(training_labels)
30
+ testing_labels_final = np.array(testing_labels)
31
+
32
+ ## Text Preprocessing
33
+ vocab_size = 1000
34
+ embedding_dim = 16
35
+ max_length = 100
36
+ trunc_type='post'
37
+ padding_type='post'
38
+ oov_tok = ""
39
+ tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
40
+ tokenizer.fit_on_texts(training_sentences)
41
+ word_index = tokenizer.word_index
42
+ sequences = tokenizer.texts_to_sequences(training_sentences)
43
+ padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type,
44
+ truncating=trunc_type)
45
+ testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
46
+ testing_padded = pad_sequences(testing_sequences,maxlen=max_length,
47
+ padding=padding_type, truncating=trunc_type)
48
+
49
+ ## Modeling
50
+ # Set lr = 0.01
51
+ model = tf.keras.Sequential([
52
+ tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
53
+ tf.keras.layers.Flatten(),
54
+ tf.keras.layers.Dense(20,activation='relu'),
55
+ tf.keras.layers.Dense(10,activation= 'relu'),
56
+ tf.keras.layers.Dense(1,activation= 'sigmoid')
57
+ ])
58
+
59
+ model.compile(loss='binary_crossentropy',metrics=['accuracy'],
60
+ optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
61
+ model.fit(padded,training_labels_final,batch_size=128,epochs=50,
62
+ validation_data=(testing_padded,testing_labels_final))
63
+
64
+ ## Gradio App
65
+ def spam_detection(message):
66
+ # Preprocess the input message
67
+ sequence = tokenizer.texts_to_sequences([message])
68
+ padded_sequence = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type)
69
+
70
+ # Make prediction
71
+ prediction = model.predict(padded_sequence)[0, 0]
72
+
73
+ # Return the result
74
+ return "Spam" if prediction >= 0.5 else "Not Spam"
75
+
76
+ # Gradio Interface
77
+ iface = gr.Interface(
78
+ fn=spam_detection,
79
+ inputs=gr.Textbox(prompt="Enter a message:"),
80
+ outputs="text",
81
+ live=True,
82
+ theme="huggingface",
83
+ title="Spam Message Detection",
84
+ description="A demo app for learning purposes. Detects spam messages with 98% accuracy based on the dataset."
85
+ )
86
+
87
+ # Launch the app
88
+ iface.launch()