SandraPK commited on
Commit
a4a5dbc
1 Parent(s): e915bc4

Upload 24 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ IMDB[[:space:]]Dataset.csv filter=lfs diff=lfs merge=lfs -text
BackPropogation.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from tqdm import tqdm
3
+
4
+
5
+ class BackPropogation:
6
+ def __init__(self,learning_rate=0.01, epochs=100,activation_function='step'):
7
+ self.bias = 0
8
+ self.learning_rate = learning_rate
9
+ self.max_epochs = epochs
10
+ self.activation_function = activation_function
11
+
12
+
13
+ def activate(self, x):
14
+ if self.activation_function == 'step':
15
+ return 1 if x >= 0 else 0
16
+ elif self.activation_function == 'sigmoid':
17
+ return 1 if (1 / (1 + np.exp(-x)))>=0.5 else 0
18
+ elif self.activation_function == 'relu':
19
+ return 1 if max(0,x)>=0.5 else 0
20
+
21
+ def fit(self, X, y):
22
+ error_sum=0
23
+ n_features = X.shape[1]
24
+ self.weights = np.zeros((n_features))
25
+ for epoch in tqdm(range(self.max_epochs)):
26
+ for i in range(len(X)):
27
+ inputs = X[i]
28
+ target = y[i]
29
+ weighted_sum = np.dot(inputs, self.weights) + self.bias
30
+ prediction = self.activate(weighted_sum)
31
+
32
+ # Calculating loss and updating weights.
33
+ error = target - prediction
34
+ self.weights += self.learning_rate * error * inputs
35
+ self.bias += self.learning_rate * error
36
+
37
+ print(f"Updated Weights after epoch {epoch} with {self.weights}")
38
+ print("Training Completed")
39
+
40
+ def predict(self, X):
41
+ predictions = []
42
+ for i in range(len(X)):
43
+ inputs = X[i]
44
+ weighted_sum = np.dot(inputs, self.weights) + self.bias
45
+ prediction = self.activate(weighted_sum)
46
+ predictions.append(prediction)
47
+ return predictions
48
+
49
+
50
+
51
+
52
+
53
+
DNN_IMDB.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from tensorflow.keras import Sequential
5
+ from tensorflow.keras.layers import Dense, Embedding, Flatten
6
+ from tensorflow.keras.optimizers import Adam
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ import pickle
10
+
11
+
12
+ # Load the IMDB dataset from a CSV file
13
+ path_to_csv = 'IMDB Dataset.csv'
14
+ df = pd.read_csv(path_to_csv)
15
+
16
+ reviews = df['review'].values
17
+ labels = df['sentiment'].values
18
+
19
+ # Convert string labels to numerical values
20
+ label_encoder = {'positive': 1, 'negative': 0}
21
+ y = np.array([label_encoder[label.lower()] for label in labels])
22
+
23
+ # Tokenize the text data
24
+ max_words = 10000
25
+ tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
26
+ tokenizer.fit_on_texts(reviews)
27
+ sequences = tokenizer.texts_to_sequences(reviews)
28
+
29
+ # Pad sequences to a fixed length
30
+ max_review_length = 200
31
+ x = pad_sequences(sequences, maxlen=max_review_length)
32
+ maxlen=200
33
+
34
+ # Model building
35
+ model = Sequential()
36
+ model.add(Embedding(input_dim=max_words, output_dim=64, input_length=maxlen))
37
+ model.add(Flatten())
38
+ model.add(Dense(64, activation='relu'))
39
+ model.add(Dense(1, activation='sigmoid'))
40
+
41
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
42
+ model.summary()
43
+
44
+ # Training
45
+ print("Training started...")
46
+ history = model.fit(x, y, epochs=3, batch_size=16, validation_split=0.2)
47
+ loss, acc = model.evaluate(x, y)
48
+ print("Training finished.")
49
+ print(f'Test Accuracy: {round(acc*100)}%')
50
+
51
+
52
+ with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file:
53
+ pickle.dump(tokenizer, tokenizer_file)
54
+
55
+
56
+ # Save the model
57
+ model.save('dnn_model_imdb.h5')
58
+ print("Model saved as 'dnn_model_imdb.h5'")
59
+
60
+
61
+ # Example: Make a prediction on a movie review
62
+ sample_review = "I really enjoyed the movie. The plot was engaging, and the acting was superb."
63
+ sample_sequence = tokenizer.texts_to_sequences([sample_review])
64
+ padded_sample = pad_sequences(sample_sequence, maxlen=max_review_length)
65
+ prediction = model.predict(padded_sample)
66
+ sentiment = "Positive" if prediction[0][0] > 0.3 else "Negative"
67
+ print(f'Predicted Sentiment: {sentiment} (Probability: {prediction[0][0]:.2f})')
68
+
IMDB Dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc447764f82be365fa9c2beef4e8df89d3919e3da95f5088004797d79695aa2
3
+ size 66212309
Perceptron.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from tqdm import tqdm
3
+ import joblib
4
+
5
+
6
+ class Perceptron:
7
+
8
+ def __init__(self,learning_rate=0.01, epochs=100,activation_function='step'):
9
+ self.bias = 0
10
+ self.learning_rate = learning_rate
11
+ self.max_epochs = epochs
12
+ self.activation_function = activation_function
13
+
14
+
15
+ def activate(self, x):
16
+ if self.activation_function == 'step':
17
+ return 1 if x >= 0 else 0
18
+ elif self.activation_function == 'sigmoid':
19
+ return 1 if (1 / (1 + np.exp(-x)))>=0.5 else 0
20
+ elif self.activation_function == 'relu':
21
+ return 1 if max(0,x)>=0.5 else 0
22
+
23
+ def fit(self, X, y):
24
+ n_features = X.shape[1]
25
+ self.weights = np.random.randint(n_features, size=(n_features))
26
+ for epoch in tqdm(range(self.max_epochs)):
27
+ for i in range(len(X)):
28
+ inputs = X[i]
29
+ target = y[i]
30
+ weighted_sum = np.dot(inputs, self.weights) + self.bias
31
+ prediction = self.activate(weighted_sum)
32
+ print("Training Completed")
33
+
34
+ def predict(self, X):
35
+ predictions = []
36
+ for i in range(len(X)):
37
+ inputs = X[i]
38
+ weighted_sum = np.dot(inputs, self.weights) + self.bias
39
+ prediction = self.activate(weighted_sum)
40
+ predictions.append(prediction)
41
+ return predictions
42
+
43
+
44
+
45
+
46
+
47
+
48
+
SMSSpamCollection.txt ADDED
The diff for this file is too large to render. See raw diff
 
Spam_dnn.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
4
+ import tensorflow as tf
5
+ from tensorflow.keras.preprocessing.text import Tokenizer
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ from tensorflow.keras.models import Sequential
8
+ from tensorflow.keras.layers import Embedding, Flatten, Dense
9
+ import pickle
10
+
11
+ # Load the dataset
12
+ txt_file_path = 'SMSSpamCollection.txt'
13
+
14
+ # Initialize empty lists to store labels and messages
15
+ labels = []
16
+ messages = []
17
+
18
+ # Read the text file line by line and extract labels and messages
19
+ try:
20
+ with open(txt_file_path, 'r', encoding='utf-8') as file:
21
+ for line in file:
22
+ parts = line.strip().split('\t')
23
+ if len(parts) == 2:
24
+ label, message = parts
25
+ labels.append(label)
26
+ messages.append(message)
27
+
28
+ # Create a DataFrame from the lists
29
+ dataset = pd.DataFrame({'label': labels, 'message': messages})
30
+ # Print the first few rows of the dataframe to check if data is loaded successfully
31
+ print(dataset.head())
32
+ except Exception as e:
33
+ print(f"Error reading text file: {e}")
34
+ # Assuming your dataset has 'label' and 'message' columns
35
+ X = dataset['message'].values
36
+ y = dataset['label'].map({'spam': 1, 'ham': 0}).values
37
+
38
+ # Split the dataset into training and testing sets
39
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
40
+
41
+ # Tokenize the text data
42
+ max_words = 10000
43
+ tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
44
+ tokenizer.fit_on_texts(X_train)
45
+ sequences_train = tokenizer.texts_to_sequences(X_train)
46
+ sequences_test = tokenizer.texts_to_sequences(X_test)
47
+
48
+ # Pad sequences to a fixed length
49
+ max_sequence_length = 200
50
+ X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
51
+ X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
52
+
53
+ # Build the DNN model
54
+ model = Sequential()
55
+ model.add(Embedding(input_dim=max_words, output_dim=64, input_length=max_sequence_length))
56
+ model.add(Flatten())
57
+ model.add(Dense(64, activation='relu'))
58
+ model.add(Dense(1, activation='sigmoid'))
59
+
60
+ # Compile the model
61
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
62
+
63
+ # Train the model
64
+ model.fit(X_train_padded, y_train, epochs=5, batch_size=32, validation_split=0.2)
65
+
66
+ # Evaluate the model on the test set
67
+ y_pred = (model.predict(X_test_padded) > 0.5).astype("int32")
68
+
69
+ # Print classification report and accuracy
70
+ print("Classification Report:")
71
+ print(classification_report(y_test, y_pred))
72
+ print("Confusion Matrix:")
73
+ print(confusion_matrix(y_test, y_pred))
74
+ print("Accuracy:", accuracy_score(y_test, y_pred))
75
+
76
+ # Save the model
77
+ model.save('spam_dnn_model.h5')
78
+
79
+ # Save the tokenizer
80
+ with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file:
81
+ tokenizer.word_index = {e: i for e, i in tokenizer.word_index.items() if i <= max_words}
82
+ pickle.dump(tokenizer, tokenizer_file)
app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Importing Necessary libraries
2
+ import streamlit as st
3
+ import numpy as np
4
+ from PIL import Image
5
+ from tensorflow.keras.datasets import imdb
6
+ from tensorflow.keras.models import load_model
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from tensorflow.keras.applications.inception_v3 import preprocess_input
10
+ import tensorflow as tf
11
+ import pickle
12
+ from tensorflow.keras.preprocessing import sequence
13
+
14
+
15
+ # Load the tokenizer using pickle
16
+ with open(r'tokenizer_rnn.pkl', 'rb') as handle:
17
+ tokenizer_rnn = pickle.load(handle)
18
+
19
+ with open(r'tokenizer_dnn.pkl', 'rb') as handle:
20
+ tokenizer_dnn = pickle.load(handle)
21
+
22
+ with open(r'tokenizer_per.pkl', 'rb') as handle:
23
+ tokenizer_per = pickle.load(handle)
24
+
25
+ with open(r'tokenizer_backpropagation.pkl', 'rb') as handle:
26
+ tokenizer_back = pickle.load(handle)
27
+
28
+ # Load saved models
29
+ image_model = load_model('tumor_detection_model.h5')
30
+ #dnn_model = tf.keras.models.load_model('dnn_model_imdb.h5')
31
+ loaded_model = tf.keras.models.load_model('spam_model.h5')
32
+ lstm_model = tf.keras.models.load_model('lstm_model.h5')
33
+ dnn_model = tf.keras.models.load_model('spam_dnn_model.h5')
34
+
35
+ with open('spam_perceptron_model.pkl', 'rb') as model_file:
36
+ loaded_perceptron = pickle.load(model_file)
37
+
38
+ with open('spam_backpropagation_model.pkl', 'rb') as model_file:
39
+ lbackprop_model = pickle.load(model_file)
40
+
41
+
42
+ # Streamlit app
43
+ st.title("Classification App")
44
+
45
+ # Sidebar
46
+ task = st.sidebar.selectbox("Select Task", ["Tumor Detection", "Sentiment Classification"])
47
+
48
+ def preprocess_text(text):
49
+ tokenizer = Tokenizer()
50
+ tokenizer.fit_on_texts([text])
51
+ sequences = tokenizer.texts_to_sequences([text])
52
+ preprocessed_text = pad_sequences(sequences, maxlen=4)
53
+
54
+ return preprocessed_text
55
+
56
+
57
+ def predict_dnn(text_input):
58
+ encoded_input = tokenizer_dnn.texts_to_sequences([text_input])
59
+ padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
60
+ prediction = dnn_model.predict(padded_input)
61
+ prediction_value = prediction[0]
62
+ # Adjust the threshold based on your model and problem
63
+ if prediction_value > 0.5:
64
+ return "Spam"
65
+ else:
66
+ return "Ham"
67
+
68
+ def predict_lstm(text_input):
69
+ words = 5000
70
+ max_review_length=500
71
+ word_index = imdb.get_word_index()
72
+ text_input = text_input.lower().split()
73
+ text_input = [word_index[word] if word in word_index and word_index[word] < words else 0 for word in text_input]
74
+ text_input = sequence.pad_sequences([text_input], maxlen=max_review_length)
75
+ prediction = lstm_model.predict(text_input)
76
+ print("Raw Prediction:", prediction)
77
+ if prediction > 0.5:
78
+ return "Positive"
79
+ else:
80
+ return "Negative"
81
+
82
+
83
+ def predict_rnn(input_text):
84
+ encoded_input = tokenizer_rnn.texts_to_sequences([input_text])
85
+ padded_input = tf.keras.preprocessing.sequence.pad_sequences(encoded_input, maxlen=10, padding='post')
86
+ prediction = loaded_model.predict(padded_input)
87
+ if prediction > 0.5:
88
+ return "Spam"
89
+ else:
90
+ return "Ham"
91
+
92
+
93
+ def predict_perceptron(text_input):
94
+ encoded_input = tokenizer_per.texts_to_sequences([text_input])
95
+ padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
96
+ prediction = loaded_perceptron.predict(padded_input)
97
+ prediction_value = prediction[0]
98
+
99
+ # Adjust the threshold based on your model and problem
100
+ if prediction_value > 0.5:
101
+ return "Spam"
102
+ else:
103
+ return "Ham"
104
+
105
+
106
+ def predict_backpropogation(text_input):
107
+ encoded_input = tokenizer_back.texts_to_sequences([text_input])
108
+ padded_input = pad_sequences(encoded_input, maxlen=200, padding='post')
109
+ prediction = lbackprop_model.predict(padded_input)
110
+ prediction_value = prediction[0]
111
+
112
+ # Adjust the threshold based on your model and problem
113
+ if prediction_value > 0.5:
114
+ return "Spam"
115
+ else:
116
+ return "Ham"
117
+
118
+ # make a prediction for CNN
119
+ def preprocess_image(image):
120
+ image = image.resize((299, 299))
121
+ image_array = np.array(image)
122
+ preprocessed_image = preprocess_input(image_array)
123
+
124
+ return preprocessed_image
125
+
126
+
127
+ def make_prediction_cnn(image, image_model):
128
+ img = image.resize((128, 128))
129
+ img_array = np.array(img)
130
+ img_array = img_array.reshape((1, img_array.shape[0], img_array.shape[1], img_array.shape[2]))
131
+
132
+ preprocessed_image = preprocess_input(img_array)
133
+ prediction = image_model.predict(preprocessed_image)
134
+
135
+ if prediction > 0.5:
136
+ st.write("Tumor Detected")
137
+ else:
138
+ st.write("No Tumor")
139
+
140
+ if task == "Sentiment Classification":
141
+ st.subheader("Choose Model")
142
+ model_choice = st.radio("Select Model", ["DNN (Email)", "RNN (Email)", "Perceptron (Email)", "Backpropagation (Email)","LSTM (Movie_Review)"])
143
+
144
+ st.subheader("Text Input")
145
+ text_input = st.text_area("Enter Text")
146
+
147
+ if st.button("Predict"):
148
+ # Preprocess the text
149
+ preprocessed_text = preprocess_text(text_input)
150
+ if model_choice == "DNN (Email)":
151
+ if text_input:
152
+ prediction_result = predict_dnn(text_input)
153
+ st.write(f"The message is classified as: {prediction_result}")
154
+ elif model_choice == "RNN (Email)":
155
+ if text_input:
156
+ prediction_result = predict_rnn(text_input)
157
+ st.write(f"The message is classified as: {prediction_result}")
158
+ else:
159
+ st.write("Please enter some text for prediction")
160
+ elif model_choice == "LSTM (Movie_Review)":
161
+ if text_input:
162
+ prediction_result = predict_lstm(text_input)
163
+ st.write(f"The sentiment is: {prediction_result}")
164
+ else:
165
+ st.write("Please enter some text for prediction")
166
+ elif model_choice == "Perceptron (Email)":
167
+ if text_input:
168
+ prediction_result = predict_perceptron(text_input)
169
+ st.write(f"The message is classified as: {prediction_result}")
170
+ else:
171
+ st.write("Please enter some text for prediction")
172
+ elif model_choice == "Backpropagation (Email)":
173
+ if text_input:
174
+ prediction_result = predict_backpropogation(text_input)
175
+ st.write(f"The message is classified as: {prediction_result}")
176
+ else:
177
+ st.write("Please enter some text for prediction")
178
+
179
+ else:
180
+ st.subheader("Choose Model")
181
+ model_choice = st.radio("Select Model", ["CNN"])
182
+
183
+ st.subheader("Image Input")
184
+ image_input = st.file_uploader("Choose an image...", type="jpg")
185
+
186
+ if image_input is not None:
187
+ image = Image.open(image_input)
188
+ st.image(image, caption="Uploaded Image.", use_column_width=True)
189
+
190
+ # Preprocess the image
191
+ preprocessed_image = preprocess_image(image)
192
+
193
+ if st.button("Predict"):
194
+ if model_choice == "CNN":
195
+ make_prediction_cnn(image, image_model)
196
+
197
+
backprop_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fab088872c073d72e358cb47f7c881045fd816657901ab1cae79d3e0bb98782
3
+ size 309
dnn_model_imdb.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a78ea55e7152f4d35222df74913180e8a04787fcef706410699ed55183e062b
3
+ size 17542648
lstm-code.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LSTM for sequence classification in the IMDB dataset
2
+ import tensorflow as tf
3
+ from tensorflow.keras.datasets import imdb
4
+ from tensorflow.keras.models import Sequential
5
+ from tensorflow.keras.layers import Dense
6
+ from tensorflow.keras.layers import LSTM
7
+ from tensorflow.keras.layers import Embedding
8
+ from tensorflow.keras.preprocessing import sequence
9
+ import pickle
10
+
11
+
12
+ # fix random seed for reproducibility
13
+ tf.random.set_seed(7)
14
+ # load the dataset but only keep the top n words, zero the rest
15
+ top_words = 5000
16
+ (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
17
+ # truncate and pad input sequences
18
+ max_review_length = 500
19
+ X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
20
+ X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
21
+ # create the model
22
+ embedding_vecor_length = 32
23
+ model = Sequential()
24
+ model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
25
+ model.add(LSTM(100))
26
+ model.add(Dense(1, activation='sigmoid'))
27
+ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
28
+ print(model.summary())
29
+ model.fit(X_train, y_train, epochs=3, batch_size=64)
30
+ # Final evaluation of the model
31
+ scores = model.evaluate(X_test, y_test, verbose=0)
32
+ print("Accuracy: %.2f%%" % (scores[1]*100))
33
+
34
+
35
+
36
+ # Save the model
37
+ model.save('lstm_model.h5')
38
+ print("Model saved as 'lstm_model.h5'")
lstm_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1041af1fb0041169446751439f196b57baf0a41f7857c8d3f8db92a70d3177a2
3
+ size 2594296
perceptron_code.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.metrics import accuracy_score, classification_report
4
+ import pickle
5
+ from tensorflow.keras.datasets import imdb
6
+ from tensorflow.keras.preprocessing import sequence
7
+
8
+ import numpy as np
9
+
10
+ class Perceptron:
11
+ def __init__(self, input_size, epochs=100, learning_rate=0.01, activation_function='sigmoid'):
12
+ self.weights = np.zeros(input_size) # Remove 1 for the bias term
13
+ self.bias = 0
14
+ self.epochs = epochs
15
+ self.learning_rate = learning_rate
16
+ self.activation_function = activation_function
17
+
18
+ def activate(self, x):
19
+ if self.activation_function == 'sigmoid':
20
+ return 1 / (1 + np.exp(-x))
21
+ elif self.activation_function == 'step':
22
+ return np.where(x >= 0, 1, 0)
23
+ else:
24
+ raise ValueError(f"Unsupported activation function: {self.activation_function}")
25
+
26
+ def fit(self, X, y):
27
+ for epoch in range(self.epochs):
28
+ for xi, target in zip(X, y):
29
+ prediction = self.activate(np.dot(xi, self.weights) + self.bias)
30
+ error = target - prediction
31
+ self.weights += self.learning_rate * error * xi
32
+ self.bias += self.learning_rate * error
33
+
34
+ def predict(self, X):
35
+ # Remove the column of ones for the bias term
36
+ weighted_sum = np.dot(X, self.weights) + self.bias
37
+ return self.activate(weighted_sum)
38
+
39
+
40
+ def predict(self, X):
41
+ # Remove the column of ones for the bias term
42
+ weighted_sum = np.dot(X, self.weights) + self.bias
43
+ return self.activate(weighted_sum)
44
+
45
+
46
+
47
+ def save_model(perceptron):
48
+ with open('perceptron_model.pkl', 'wb') as model_file:
49
+ pickle.dump(perceptron, model_file)
50
+
51
+ # Load the IMDB dataset
52
+ top_words = 5000
53
+ (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
54
+
55
+ # Preprocess labels for binary classification
56
+ y_train = np.array(y_train)
57
+ y_test = np.array(y_test)
58
+ y_train = np.where(y_train >= 7, 1, 0)
59
+ y_test = np.where(y_test >= 7, 1, 0)
60
+
61
+ # Normalize input data
62
+ max_review_length = 500
63
+ X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
64
+ X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
65
+
66
+ # Create and train the Perceptron
67
+ input_size = X_train.shape[1]
68
+ learning_rate = 0.01
69
+ perceptron = Perceptron(input_size=input_size, epochs=10, learning_rate=learning_rate)
70
+ perceptron.fit(X_train, y_train)
71
+
72
+ # Save the trained model
73
+ save_model(perceptron)
74
+
75
+ # Make predictions
76
+ pred = perceptron.predict(X_test)
77
+
78
+ # Assuming pred contains probabilities
79
+ threshold = 0.5
80
+ binary_predictions = (pred > threshold).astype(int)
81
+
82
+ # Now use binary_predictions for evaluation
83
+ print(f"Accuracy: {accuracy_score(y_test, binary_predictions)}")
84
+ report = classification_report(y_test, binary_predictions, digits=2)
85
+ print(report)
perceptron_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74426e092081537f0925df943428ca71166d55e055a2023821b26d37938fd42a
3
+ size 4300
spam_back.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.metrics import classification_report, accuracy_score
4
+ from tensorflow.keras.preprocessing.text import Tokenizer
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from BackPropogation import BackPropogation
7
+ from sklearn.preprocessing import LabelEncoder
8
+ import pickle
9
+
10
+ # Load the SMS Spam Collection dataset
11
+ sms_dataset_path = 'SMSSpamCollection.txt'
12
+ sms_data = []
13
+ sms_labels = []
14
+
15
+ with open(sms_dataset_path, 'r', encoding='utf-8') as file:
16
+ for line in file:
17
+ parts = line.strip().split('\t')
18
+ if len(parts) == 2:
19
+ label, message = parts
20
+ sms_labels.append(label)
21
+ sms_data.append(message)
22
+
23
+
24
+ # Use LabelEncoder to encode 'spam' and 'ham' into numerical values
25
+ label_encoder = LabelEncoder()
26
+ sms_labels = label_encoder.fit_transform(sms_labels)
27
+
28
+ # Assuming your Backpropagation class does not require input_size during initialization
29
+ backpropagation = BackPropogation(learning_rate=0.01, epochs=5)
30
+
31
+ # Split the dataset into training and testing sets
32
+ X_train, X_test, y_train, y_test = train_test_split(sms_data, sms_labels, test_size=0.2, random_state=42)
33
+
34
+ # Tokenize the text data
35
+ max_words = 10000
36
+ tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
37
+ tokenizer.fit_on_texts(X_train)
38
+ sequences_train = tokenizer.texts_to_sequences(X_train)
39
+ sequences_test = tokenizer.texts_to_sequences(X_test)
40
+
41
+ # Pad sequences to a fixed length
42
+ max_sequence_length = 200
43
+ X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
44
+ X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
45
+
46
+ # Flatten the input sequences
47
+ X_train_flatten = X_train_padded.reshape((X_train_padded.shape[0], -1))
48
+
49
+ # Train the Backpropagation model
50
+ backpropagation.fit(X_train_flatten, y_train)
51
+
52
+ # Use the same tokenizer to transform the test data
53
+ sequences_test = tokenizer.texts_to_sequences(X_test)
54
+ X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
55
+
56
+ # Make predictions on the test set
57
+ predictions = backpropagation.predict(X_test_padded)
58
+
59
+ # Evaluate and print results
60
+ print("Perceptron Classification Report:")
61
+ print(classification_report(y_test, predictions))
62
+ print("Perceptron Accuracy:", accuracy_score(y_test, predictions))
63
+
64
+
65
+ # Save the trained Backpropagation model using pickle
66
+ backpropagation_model_path = 'spam_backpropagation_model.pkl'
67
+ with open(backpropagation_model_path, 'wb') as model_file:
68
+ pickle.dump(backpropagation, model_file)
69
+
70
+ # Save the tokenizer using pickle
71
+ tokenizer_path = 'tokenizer_backpropagation.pkl'
72
+ with open(tokenizer_path, 'wb') as tokenizer_file:
73
+ pickle.dump(tokenizer, tokenizer_file)
spam_backpropagation_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7eb020c3d330de0ff75a778302b6c21740487d106040d0a5190a7c4ac5f5902
3
+ size 1896
spam_dnn_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523a4bef4281cf072886092b0e36f29e553fc2220bb554ed1501e2d8ed718103
3
+ size 17542648
spam_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8685b0df3fbe649818311e74aefef8062f3dc503661fbd243687969e1544f712
3
+ size 2269016
spam_perceptron.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.metrics import classification_report, accuracy_score
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from tensorflow.keras.preprocessing.text import Tokenizer
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ from Perceptron import Perceptron
8
+ import pickle
9
+
10
+ # Load the SMS Spam Collection dataset
11
+ sms_dataset_path = 'SMSSpamCollection.txt'
12
+ sms_data = []
13
+ sms_labels = []
14
+
15
+ with open(sms_dataset_path, 'r', encoding='utf-8') as file:
16
+ for line in file:
17
+ parts = line.strip().split('\t')
18
+ if len(parts) == 2:
19
+ label, message = parts
20
+ sms_labels.append(label)
21
+ sms_data.append(message)
22
+
23
+ label_encoder = LabelEncoder()
24
+ sms_labels = label_encoder.fit_transform(sms_labels)
25
+
26
+ # Split the dataset into training and testing sets
27
+ X_train, X_test, y_train, y_test = train_test_split(sms_data, sms_labels, test_size=0.2, random_state=42)
28
+
29
+ # Tokenize the text data
30
+ max_words = 10000
31
+ tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
32
+ tokenizer.fit_on_texts(X_train)
33
+ sequences_train = tokenizer.texts_to_sequences(X_train)
34
+ sequences_test = tokenizer.texts_to_sequences(X_test)
35
+
36
+ # Pad sequences to a fixed length
37
+ max_sequence_length = 200
38
+ X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post')
39
+ X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
40
+
41
+
42
+ # Create and train the Perceptron using your Perceptron class
43
+ perceptron = Perceptron(learning_rate=0.01, epochs=100, activation_function='step')
44
+ perceptron.fit(X_train_padded, y_train)
45
+
46
+ # Use the same tokenizer to transform the test data
47
+ sequences_test = tokenizer.texts_to_sequences(X_test)
48
+ X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post')
49
+
50
+ # Make predictions on the test set
51
+ predictions = perceptron.predict(X_test_padded)
52
+
53
+ # Evaluate and print results
54
+ print("Perceptron Classification Report:")
55
+ print(classification_report(y_test, predictions))
56
+ print("Perceptron Accuracy:", accuracy_score(y_test, predictions))
57
+
58
+
59
+ # Save the trained Perceptron model using pickle
60
+ perceptron_model_path = 'spam_perceptron_model.pkl'
61
+ with open(perceptron_model_path, 'wb') as model_file:
62
+ pickle.dump(perceptron, model_file)
63
+
64
+ # Save the tokenizer using pickle
65
+ tokenizer_path = 'tokenizer_per.pkl'
66
+ with open(tokenizer_path, 'wb') as tokenizer_file:
67
+ pickle.dump(tokenizer, tokenizer_file)
68
+
69
+
70
+
spam_perceptron_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f1f15d4fb4fe21547d66bfaf1d2d0a82228630743b5c478e1ef28e9ed763c7
3
+ size 1063
tokenizer_backpropagation.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521eb792bc61700632c9c1a0fe64c5361ae455394e4bb1707f4e599ac8d6407d
3
+ size 309811
tokenizer_dnn.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe0e18829be8b6a19fd30f5335eab0a994003e322a90d9b10a30699e5b8ae6b
3
+ size 309811
tokenizer_per.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ad96b864dc27f70878838b38ada5342581533fa5279531a75a2d5d56ac7041
3
+ size 309811
tokenizer_rnn.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1acba054c3040886e224a36ca905a2121a85bbf65a9aa52f2707227829480bdc
3
+ size 290462
tumor_detection_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e3cd70401c053d4f32ca737a74f097d3877e4d4244480c230a6b43c7e4eba0
3
+ size 391811360