Spaces:

SandraPK
/

Classification-App

Sleeping

App Files Files Community

SandraPK commited on Nov 26, 2023

Commit

a4a5dbc

•

1 Parent(s): e915bc4

Upload 24 files

Browse files

Files changed (25) hide show

.gitattributes +1 -0
BackPropogation.py +53 -0
DNN_IMDB.py +68 -0
IMDB Dataset.csv +3 -0
Perceptron.py +48 -0
SMSSpamCollection.txt +0 -0
Spam_dnn.py +82 -0
app.py +197 -0
backprop_model.pkl +3 -0
dnn_model_imdb.h5 +3 -0
lstm-code.py +38 -0
lstm_model.h5 +3 -0
perceptron_code.py +85 -0
perceptron_model.pkl +3 -0
spam_back.py +73 -0
spam_backpropagation_model.pkl +3 -0
spam_dnn_model.h5 +3 -0
spam_model.h5 +3 -0
spam_perceptron.py +70 -0
spam_perceptron_model.pkl +3 -0
tokenizer_backpropagation.pkl +3 -0
tokenizer_dnn.pkl +3 -0
tokenizer_per.pkl +3 -0
tokenizer_rnn.pkl +3 -0
tumor_detection_model.h5 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+IMDB[[:space:]]Dataset.csv filter=lfs diff=lfs merge=lfs -text

BackPropogation.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import numpy as np
+from tqdm import tqdm
+class BackPropogation:
+    def __init__(self,learning_rate=0.01, epochs=100,activation_function='step'):
+        self.bias = 0
+        self.learning_rate = learning_rate
+        self.max_epochs = epochs
+        self.activation_function = activation_function
+    def activate(self, x):
+        if self.activation_function == 'step':
+            return 1 if x >= 0 else 0
+        elif self.activation_function == 'sigmoid':
+            return 1 if (1 / (1 + np.exp(-x)))>=0.5 else 0
+        elif self.activation_function == 'relu':
+            return 1 if max(0,x)>=0.5 else 0
+    def fit(self, X, y):
+        error_sum=0
+        n_features = X.shape[1]
+        self.weights = np.zeros((n_features))
+        for epoch in tqdm(range(self.max_epochs)):
+            for i in range(len(X)):
+                inputs = X[i]
+                target = y[i]
+                weighted_sum = np.dot(inputs, self.weights) + self.bias
+                prediction = self.activate(weighted_sum)
+                # Calculating loss and updating weights.
+                error = target - prediction
+                self.weights += self.learning_rate * error * inputs
+                self.bias += self.learning_rate * error
+            print(f"Updated Weights after epoch {epoch} with {self.weights}")
+        print("Training Completed")
+    def predict(self, X):
+        predictions = []
+        for i in range(len(X)):
+            inputs = X[i]
+            weighted_sum = np.dot(inputs, self.weights) + self.bias
+            prediction = self.activate(weighted_sum)
+            predictions.append(prediction)
+        return predictions

DNN_IMDB.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from tensorflow.keras import Sequential
+from tensorflow.keras.layers import Dense, Embedding, Flatten
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import pickle
+# Load the IMDB dataset from a CSV file
+path_to_csv = 'IMDB Dataset.csv'
+df = pd.read_csv(path_to_csv)
+reviews = df['review'].values
+labels = df['sentiment'].values
+# Convert string labels to numerical values
+label_encoder = {'positive': 1, 'negative': 0}
+y = np.array([label_encoder[label.lower()] for label in labels])
+# Tokenize the text data
+max_words = 10000
+tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
+tokenizer.fit_on_texts(reviews)
+sequences = tokenizer.texts_to_sequences(reviews)
+# Pad sequences to a fixed length
+max_review_length = 200
+x = pad_sequences(sequences, maxlen=max_review_length)
+maxlen=200
+# Model building
+model = Sequential()
+model.add(Embedding(input_dim=max_words, output_dim=64, input_length=maxlen))
+model.add(Flatten())
+model.add(Dense(64, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+model.summary()
+# Training
+print("Training started...")
+history = model.fit(x, y, epochs=3, batch_size=16, validation_split=0.2)
+loss, acc = model.evaluate(x, y)
+print("Training finished.")
+print(f'Test Accuracy: {round(acc*100)}%')
+with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file:
+    pickle.dump(tokenizer, tokenizer_file)
+# Save the model
+model.save('dnn_model_imdb.h5')
+print("Model saved as 'dnn_model_imdb.h5'")
+# Example: Make a prediction on a movie review
+sample_review = "I really enjoyed the movie. The plot was engaging, and the acting was superb."
+sample_sequence = tokenizer.texts_to_sequences([sample_review])
+padded_sample = pad_sequences(sample_sequence, maxlen=max_review_length)
+prediction = model.predict(padded_sample)
+sentiment = "Positive" if prediction[0][0] > 0.3 else "Negative"
+print(f'Predicted Sentiment: {sentiment} (Probability: {prediction[0][0]:.2f})')

IMDB Dataset.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfc447764f82be365fa9c2beef4e8df89d3919e3da95f5088004797d79695aa2
+size 66212309

Perceptron.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import numpy as np
+from tqdm import tqdm
+import joblib
+class Perceptron:
+    def __init__(self,learning_rate=0.01, epochs=100,activation_function='step'):
+        self.bias = 0
+        self.learning_rate = learning_rate
+        self.max_epochs = epochs
+        self.activation_function = activation_function
+    def activate(self, x):
+        if self.activation_function == 'step':
+            return 1 if x >= 0 else 0
+        elif self.activation_function == 'sigmoid':
+            return 1 if (1 / (1 + np.exp(-x)))>=0.5 else 0
+        elif self.activation_function == 'relu':
+            return 1 if max(0,x)>=0.5 else 0
+    def fit(self, X, y):
+        n_features = X.shape[1]
+        self.weights = np.random.randint(n_features, size=(n_features))
+        for epoch in tqdm(range(self.max_epochs)):
+            for i in range(len(X)):
+                inputs = X[i]
+                target = y[i]
+                weighted_sum = np.dot(inputs, self.weights) + self.bias
+                prediction = self.activate(weighted_sum)
+        print("Training Completed")
+    def predict(self, X):
+        predictions = []
+        for i in range(len(X)):
+            inputs = X[i]
+            weighted_sum = np.dot(inputs, self.weights) + self.bias
+            prediction = self.activate(weighted_sum)
+            predictions.append(prediction)
+        return predictions

SMSSpamCollection.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Spam_dnn.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Embedding, Flatten, Dense
+import pickle
+# Load the dataset
+txt_file_path = 'SMSSpamCollection.txt'
+# Initialize empty lists to store labels and messages
+labels = []
+messages = []
+# Read the text file line by line and extract labels and messages
+try:
+    with open(txt_file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            parts = line.strip().split('\t')
+            if len(parts) == 2:
+                label, message = parts
+                labels.append(label)
+                messages.append(message)
+    # Create a DataFrame from the lists
+    dataset = pd.DataFrame({'label': labels, 'message': messages})
+    # Print the first few rows of the dataframe to check if data is loaded successfully
+    print(dataset.head())
+except Exception as e:
+    print(f"Error reading text file: {e}")
+# Assuming your dataset has 'label' and 'message' columns
+X = dataset['message'].values
+y = dataset['label'].map({'spam': 1, 'ham': 0}).values
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Tokenize the text data
+max_words = 10000
+tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
+tokenizer.fit_on_texts(X_train)
+sequences_train = tokenizer.texts_to_sequences(X_train)
+sequences_test = tokenizer.texts_to_sequences(X_test)
+# Pad sequences to a fixed length
+max_sequence_length = 200
+X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
+X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
+# Build the DNN model
+model = Sequential()
+model.add(Embedding(input_dim=max_words, output_dim=64, input_length=max_sequence_length))
+model.add(Flatten())
+model.add(Dense(64, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+# Compile the model
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+# Train the model
+model.fit(X_train_padded, y_train, epochs=5, batch_size=32, validation_split=0.2)
+# Evaluate the model on the test set
+y_pred = (model.predict(X_test_padded) > 0.5).astype("int32")
+# Print classification report and accuracy
+print("Classification Report:")
+print(classification_report(y_test, y_pred))
+print("Confusion Matrix:")
+print(confusion_matrix(y_test, y_pred))
+print("Accuracy:", accuracy_score(y_test, y_pred))
+# Save the model
+model.save('spam_dnn_model.h5')
+# Save the tokenizer
+with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file:
+    tokenizer.word_index = {e: i for e, i in tokenizer.word_index.items() if i <= max_words}
+    pickle.dump(tokenizer, tokenizer_file)

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+#Importing Necessary libraries
+import streamlit as st
+import numpy as np
+from PIL import Image
+from tensorflow.keras.datasets import imdb
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.applications.inception_v3 import preprocess_input
+import tensorflow as tf
+import pickle
+from tensorflow.keras.preprocessing import sequence
+# Load the tokenizer using pickle
+with open(r'tokenizer_rnn.pkl', 'rb') as handle:
+    tokenizer_rnn = pickle.load(handle)
+with open(r'tokenizer_dnn.pkl', 'rb') as handle:
+    tokenizer_dnn = pickle.load(handle)
+with open(r'tokenizer_per.pkl', 'rb') as handle:
+    tokenizer_per = pickle.load(handle)
+with open(r'tokenizer_backpropagation.pkl', 'rb') as handle:
+    tokenizer_back = pickle.load(handle)
+# Load saved models
+image_model = load_model('tumor_detection_model.h5')
+#dnn_model = tf.keras.models.load_model('dnn_model_imdb.h5')
+loaded_model = tf.keras.models.load_model('spam_model.h5')
+lstm_model = tf.keras.models.load_model('lstm_model.h5')
+dnn_model = tf.keras.models.load_model('spam_dnn_model.h5')
+with open('spam_perceptron_model.pkl', 'rb') as model_file:
+    loaded_perceptron = pickle.load(model_file)
+with open('spam_backpropagation_model.pkl', 'rb') as model_file:
+    lbackprop_model = pickle.load(model_file)
+# Streamlit app
+st.title("Classification App")
+# Sidebar
+task = st.sidebar.selectbox("Select Task", ["Tumor Detection", "Sentiment Classification"])
+def preprocess_text(text):
+    tokenizer = Tokenizer()
+    tokenizer.fit_on_texts([text])
+    sequences = tokenizer.texts_to_sequences([text])
+    preprocessed_text = pad_sequences(sequences, maxlen=4)
+    return preprocessed_text
+def predict_dnn(text_input):
+    encoded_input = tokenizer_dnn.texts_to_sequences([text_input])
+    padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
+    prediction = dnn_model.predict(padded_input)
+    prediction_value = prediction[0]
+    # Adjust the threshold based on your model and problem
+    if prediction_value > 0.5:
+        return "Spam"
+    else:
+        return "Ham"
+def predict_lstm(text_input):
+    words = 5000
+    max_review_length=500
+    word_index = imdb.get_word_index()
+    text_input = text_input.lower().split()
+    text_input = [word_index[word] if word in word_index and word_index[word] < words else 0 for word in text_input]
+    text_input = sequence.pad_sequences([text_input], maxlen=max_review_length)
+    prediction = lstm_model.predict(text_input)
+    print("Raw Prediction:", prediction)
+    if prediction > 0.5:
+        return "Positive"
+    else:
+        return "Negative"
+def predict_rnn(input_text):
+    encoded_input = tokenizer_rnn.texts_to_sequences([input_text])
+    padded_input = tf.keras.preprocessing.sequence.pad_sequences(encoded_input, maxlen=10, padding='post')
+    prediction = loaded_model.predict(padded_input)
+    if prediction > 0.5:
+        return "Spam"
+    else:
+        return "Ham"
+def predict_perceptron(text_input):
+    encoded_input = tokenizer_per.texts_to_sequences([text_input])
+    padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
+    prediction = loaded_perceptron.predict(padded_input)
+    prediction_value = prediction[0]
+    # Adjust the threshold based on your model and problem
+    if prediction_value > 0.5:
+        return "Spam"
+    else:
+        return "Ham"
+def predict_backpropogation(text_input):
+    encoded_input = tokenizer_back.texts_to_sequences([text_input])
+    padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
+    prediction = lbackprop_model.predict(padded_input)
+    prediction_value = prediction[0]
+    # Adjust the threshold based on your model and problem
+    if prediction_value > 0.5:
+        return "Spam"
+    else:
+        return "Ham"
+# make a prediction for CNN
+def preprocess_image(image):
+    image = image.resize((299, 299))
+    image_array = np.array(image)
+    preprocessed_image = preprocess_input(image_array)
+    return preprocessed_image
+def make_prediction_cnn(image, image_model):
+    img = image.resize((128, 128))
+    img_array = np.array(img)
+    img_array = img_array.reshape((1, img_array.shape[0], img_array.shape[1], img_array.shape[2]))
+    preprocessed_image = preprocess_input(img_array)
+    prediction = image_model.predict(preprocessed_image)
+    if prediction > 0.5:
+        st.write("Tumor Detected")
+    else:
+        st.write("No Tumor")
+if task == "Sentiment Classification":
+    st.subheader("Choose Model")
+    model_choice = st.radio("Select Model", ["DNN (Email)", "RNN (Email)", "Perceptron (Email)", "Backpropagation (Email)","LSTM (Movie_Review)"])
+    st.subheader("Text Input")
+    text_input = st.text_area("Enter Text")
+    if st.button("Predict"):
+        # Preprocess the text
+        preprocessed_text = preprocess_text(text_input)
+        if model_choice == "DNN (Email)":
+            if text_input:
+                prediction_result = predict_dnn(text_input)
+                st.write(f"The message is classified as: {prediction_result}")
+        elif model_choice == "RNN (Email)":
+            if text_input:
+                prediction_result = predict_rnn(text_input)
+                st.write(f"The message is classified as: {prediction_result}")
+            else:
+                st.write("Please enter some text for prediction")
+        elif model_choice == "LSTM (Movie_Review)":
+            if text_input:
+                prediction_result = predict_lstm(text_input)
+                st.write(f"The sentiment is: {prediction_result}")
+            else:
+                st.write("Please enter some text for prediction")
+        elif model_choice == "Perceptron (Email)":
+            if text_input:
+                prediction_result = predict_perceptron(text_input)
+                st.write(f"The message is classified as: {prediction_result}")
+            else:
+                st.write("Please enter some text for prediction")
+        elif model_choice == "Backpropagation (Email)":
+            if text_input:
+                prediction_result = predict_backpropogation(text_input)
+                st.write(f"The message is classified as: {prediction_result}")
+            else:
+                st.write("Please enter some text for prediction")
+else:
+    st.subheader("Choose Model")
+    model_choice = st.radio("Select Model", ["CNN"])
+    st.subheader("Image Input")
+    image_input = st.file_uploader("Choose an image...", type="jpg")
+    if image_input is not None:
+        image = Image.open(image_input)
+        st.image(image, caption="Uploaded Image.", use_column_width=True)
+        # Preprocess the image
+        preprocessed_image = preprocess_image(image)
+        if st.button("Predict"):
+            if model_choice == "CNN":
+                make_prediction_cnn(image, image_model)

backprop_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fab088872c073d72e358cb47f7c881045fd816657901ab1cae79d3e0bb98782
+size 309

dnn_model_imdb.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a78ea55e7152f4d35222df74913180e8a04787fcef706410699ed55183e062b
+size 17542648

lstm-code.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# LSTM for sequence classification in the IMDB dataset
+import tensorflow as tf
+from tensorflow.keras.datasets import imdb
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.layers import LSTM
+from tensorflow.keras.layers import Embedding
+from tensorflow.keras.preprocessing import sequence
+import pickle
+# fix random seed for reproducibility
+tf.random.set_seed(7)
+# load the dataset but only keep the top n words, zero the rest
+top_words = 5000
+(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
+# truncate and pad input sequences
+max_review_length = 500
+X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
+X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
+# create the model
+embedding_vecor_length = 32
+model = Sequential()
+model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
+model.add(LSTM(100))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+print(model.summary())
+model.fit(X_train, y_train, epochs=3, batch_size=64)
+# Final evaluation of the model
+scores = model.evaluate(X_test, y_test, verbose=0)
+print("Accuracy: %.2f%%" % (scores[1]*100))
+# Save the model
+model.save('lstm_model.h5')
+print("Model saved as 'lstm_model.h5'")

lstm_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1041af1fb0041169446751439f196b57baf0a41f7857c8d3f8db92a70d3177a2
+size 2594296

perceptron_code.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report
+import pickle
+from tensorflow.keras.datasets import imdb
+from tensorflow.keras.preprocessing import sequence
+import numpy as np
+class Perceptron:
+    def __init__(self, input_size, epochs=100, learning_rate=0.01, activation_function='sigmoid'):
+        self.weights = np.zeros(input_size)  # Remove 1 for the bias term
+        self.bias = 0
+        self.epochs = epochs
+        self.learning_rate = learning_rate
+        self.activation_function = activation_function
+    def activate(self, x):
+        if self.activation_function == 'sigmoid':
+            return 1 / (1 + np.exp(-x))
+        elif self.activation_function == 'step':
+            return np.where(x >= 0, 1, 0)
+        else:
+            raise ValueError(f"Unsupported activation function: {self.activation_function}")
+    def fit(self, X, y):
+        for epoch in range(self.epochs):
+            for xi, target in zip(X, y):
+                prediction = self.activate(np.dot(xi, self.weights) + self.bias)
+                error = target - prediction
+                self.weights += self.learning_rate * error * xi
+                self.bias += self.learning_rate * error
+    def predict(self, X):
+        # Remove the column of ones for the bias term
+        weighted_sum = np.dot(X, self.weights) + self.bias
+        return self.activate(weighted_sum)
+    def predict(self, X):
+        # Remove the column of ones for the bias term
+        weighted_sum = np.dot(X, self.weights) + self.bias
+        return self.activate(weighted_sum)
+def save_model(perceptron):
+    with open('perceptron_model.pkl', 'wb') as model_file:
+        pickle.dump(perceptron, model_file)
+# Load the IMDB dataset
+top_words = 5000
+(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
+# Preprocess labels for binary classification
+y_train = np.array(y_train)
+y_test = np.array(y_test)
+y_train = np.where(y_train >= 7, 1, 0)
+y_test = np.where(y_test >= 7, 1, 0)
+# Normalize input data
+max_review_length = 500
+X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
+X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
+# Create and train the Perceptron
+input_size = X_train.shape[1]
+learning_rate = 0.01
+perceptron = Perceptron(input_size=input_size, epochs=10, learning_rate=learning_rate)
+perceptron.fit(X_train, y_train)
+# Save the trained model
+save_model(perceptron)
+# Make predictions
+pred = perceptron.predict(X_test)
+# Assuming pred contains probabilities
+threshold = 0.5
+binary_predictions = (pred > threshold).astype(int)
+# Now use binary_predictions for evaluation
+print(f"Accuracy: {accuracy_score(y_test, binary_predictions)}")
+report = classification_report(y_test, binary_predictions, digits=2)
+print(report)

perceptron_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74426e092081537f0925df943428ca71166d55e055a2023821b26d37938fd42a
+size 4300

spam_back.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, accuracy_score
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from BackPropogation import BackPropogation
+from sklearn.preprocessing import LabelEncoder
+import pickle
+# Load the SMS Spam Collection dataset
+sms_dataset_path = 'SMSSpamCollection.txt'
+sms_data = []
+sms_labels = []
+with open(sms_dataset_path, 'r', encoding='utf-8') as file:
+    for line in file:
+        parts = line.strip().split('\t')
+        if len(parts) == 2:
+            label, message = parts
+            sms_labels.append(label)
+            sms_data.append(message)
+# Use LabelEncoder to encode 'spam' and 'ham' into numerical values
+label_encoder = LabelEncoder()
+sms_labels = label_encoder.fit_transform(sms_labels)
+# Assuming your Backpropagation class does not require input_size during initialization
+backpropagation = BackPropogation(learning_rate=0.01, epochs=5)
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(sms_data, sms_labels, test_size=0.2, random_state=42)
+# Tokenize the text data
+max_words = 10000
+tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
+tokenizer.fit_on_texts(X_train)
+sequences_train = tokenizer.texts_to_sequences(X_train)
+sequences_test = tokenizer.texts_to_sequences(X_test)
+# Pad sequences to a fixed length
+max_sequence_length = 200
+X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
+X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
+# Flatten the input sequences
+X_train_flatten = X_train_padded.reshape((X_train_padded.shape[0], -1))
+# Train the Backpropagation model
+backpropagation.fit(X_train_flatten, y_train)
+# Use the same tokenizer to transform the test data
+sequences_test = tokenizer.texts_to_sequences(X_test)
+X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
+# Make predictions on the test set
+predictions = backpropagation.predict(X_test_padded)
+# Evaluate and print results
+print("Perceptron Classification Report:")
+print(classification_report(y_test, predictions))
+print("Perceptron Accuracy:", accuracy_score(y_test, predictions))
+# Save the trained Backpropagation model using pickle
+backpropagation_model_path = 'spam_backpropagation_model.pkl'
+with open(backpropagation_model_path, 'wb') as model_file:
+    pickle.dump(backpropagation, model_file)
+# Save the tokenizer using pickle
+tokenizer_path = 'tokenizer_backpropagation.pkl'
+with open(tokenizer_path, 'wb') as tokenizer_file:
+    pickle.dump(tokenizer, tokenizer_file)

spam_backpropagation_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7eb020c3d330de0ff75a778302b6c21740487d106040d0a5190a7c4ac5f5902
+size 1896

spam_dnn_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:523a4bef4281cf072886092b0e36f29e553fc2220bb554ed1501e2d8ed718103
+size 17542648

spam_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8685b0df3fbe649818311e74aefef8062f3dc503661fbd243687969e1544f712
+size 2269016

spam_perceptron.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, accuracy_score
+from sklearn.preprocessing import LabelEncoder
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from Perceptron import  Perceptron
+import pickle
+# Load the SMS Spam Collection dataset
+sms_dataset_path = 'SMSSpamCollection.txt'
+sms_data = []
+sms_labels = []
+with open(sms_dataset_path, 'r', encoding='utf-8') as file:
+    for line in file:
+        parts = line.strip().split('\t')
+        if len(parts) == 2:
+            label, message = parts
+            sms_labels.append(label)
+            sms_data.append(message)
+label_encoder = LabelEncoder()
+sms_labels = label_encoder.fit_transform(sms_labels)
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(sms_data, sms_labels, test_size=0.2, random_state=42)
+# Tokenize the text data
+max_words = 10000
+tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
+tokenizer.fit_on_texts(X_train)
+sequences_train = tokenizer.texts_to_sequences(X_train)
+sequences_test = tokenizer.texts_to_sequences(X_test)
+# Pad sequences to a fixed length
+max_sequence_length = 200
+X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
+X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
+# Create and train the Perceptron using your Perceptron class
+perceptron = Perceptron(learning_rate=0.01, epochs=100, activation_function='step')
+perceptron.fit(X_train_padded, y_train)
+# Use the same tokenizer to transform the test data
+sequences_test = tokenizer.texts_to_sequences(X_test)
+X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
+# Make predictions on the test set
+predictions = perceptron.predict(X_test_padded)
+# Evaluate and print results
+print("Perceptron Classification Report:")
+print(classification_report(y_test, predictions))
+print("Perceptron Accuracy:", accuracy_score(y_test, predictions))
+# Save the trained Perceptron model using pickle
+perceptron_model_path = 'spam_perceptron_model.pkl'
+with open(perceptron_model_path, 'wb') as model_file:
+    pickle.dump(perceptron, model_file)
+# Save the tokenizer using pickle
+tokenizer_path = 'tokenizer_per.pkl'
+with open(tokenizer_path, 'wb') as tokenizer_file:
+    pickle.dump(tokenizer, tokenizer_file)

spam_perceptron_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33f1f15d4fb4fe21547d66bfaf1d2d0a82228630743b5c478e1ef28e9ed763c7
+size 1063

tokenizer_backpropagation.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:521eb792bc61700632c9c1a0fe64c5361ae455394e4bb1707f4e599ac8d6407d
+size 309811

tokenizer_dnn.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abe0e18829be8b6a19fd30f5335eab0a994003e322a90d9b10a30699e5b8ae6b
+size 309811

tokenizer_per.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81ad96b864dc27f70878838b38ada5342581533fa5279531a75a2d5d56ac7041
+size 309811

tokenizer_rnn.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1acba054c3040886e224a36ca905a2121a85bbf65a9aa52f2707227829480bdc
+size 290462

tumor_detection_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95e3cd70401c053d4f32ca737a74f097d3877e4d4244480c230a6b43c7e4eba0
+size 391811360