import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, accuracy_score from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from BackPropogation import BackPropogation from sklearn.preprocessing import LabelEncoder import pickle # Load the SMS Spam Collection dataset sms_dataset_path = 'SMSSpamCollection.txt' sms_data = [] sms_labels = [] with open(sms_dataset_path, 'r', encoding='utf-8') as file: for line in file: parts = line.strip().split('\t') if len(parts) == 2: label, message = parts sms_labels.append(label) sms_data.append(message) # Use LabelEncoder to encode 'spam' and 'ham' into numerical values label_encoder = LabelEncoder() sms_labels = label_encoder.fit_transform(sms_labels) # Assuming your Backpropagation class does not require input_size during initialization backpropagation = BackPropogation(learning_rate=0.01, epochs=5) # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(sms_data, sms_labels, test_size=0.2, random_state=42) # Tokenize the text data max_words = 10000 tokenizer = Tokenizer(num_words=max_words, oov_token='') tokenizer.fit_on_texts(X_train) sequences_train = tokenizer.texts_to_sequences(X_train) sequences_test = tokenizer.texts_to_sequences(X_test) # Pad sequences to a fixed length max_sequence_length = 200 X_train_padded = pad_sequences(sequences_train, maxlen=max_sequence_length, padding='post') X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post') # Flatten the input sequences X_train_flatten = X_train_padded.reshape((X_train_padded.shape[0], -1)) # Train the Backpropagation model backpropagation.fit(X_train_flatten, y_train) # Use the same tokenizer to transform the test data sequences_test = tokenizer.texts_to_sequences(X_test) X_test_padded = pad_sequences(sequences_test, maxlen=max_sequence_length, padding='post') # Make predictions on the test set predictions = backpropagation.predict(X_test_padded) # Evaluate and print results print("Perceptron Classification Report:") print(classification_report(y_test, predictions)) print("Perceptron Accuracy:", accuracy_score(y_test, predictions)) # Save the trained Backpropagation model using pickle backpropagation_model_path = 'spam_backpropagation_model.pkl' with open(backpropagation_model_path, 'wb') as model_file: pickle.dump(backpropagation, model_file) # Save the tokenizer using pickle tokenizer_path = 'tokenizer_backpropagation.pkl' with open(tokenizer_path, 'wb') as tokenizer_file: pickle.dump(tokenizer, tokenizer_file)