import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from tensorflow.keras import Sequential from tensorflow.keras.layers import Dense, Embedding, Flatten from tensorflow.keras.optimizers import Adam from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import pickle # Load the IMDB dataset from a CSV file path_to_csv = 'IMDB Dataset.csv' df = pd.read_csv(path_to_csv) reviews = df['review'].values labels = df['sentiment'].values # Convert string labels to numerical values label_encoder = {'positive': 1, 'negative': 0} y = np.array([label_encoder[label.lower()] for label in labels]) # Tokenize the text data max_words = 10000 tokenizer = Tokenizer(num_words=max_words, oov_token='') tokenizer.fit_on_texts(reviews) sequences = tokenizer.texts_to_sequences(reviews) # Pad sequences to a fixed length max_review_length = 200 x = pad_sequences(sequences, maxlen=max_review_length) maxlen=200 # Model building model = Sequential() model.add(Embedding(input_dim=max_words, output_dim=64, input_length=maxlen)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.summary() # Training print("Training started...") history = model.fit(x, y, epochs=3, batch_size=16, validation_split=0.2) loss, acc = model.evaluate(x, y) print("Training finished.") print(f'Test Accuracy: {round(acc*100)}%') with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file: pickle.dump(tokenizer, tokenizer_file) # Save the model model.save('dnn_model_imdb.h5') print("Model saved as 'dnn_model_imdb.h5'") # Example: Make a prediction on a movie review sample_review = "I really enjoyed the movie. The plot was engaging, and the acting was superb." sample_sequence = tokenizer.texts_to_sequences([sample_review]) padded_sample = pad_sequences(sample_sequence, maxlen=max_review_length) prediction = model.predict(padded_sample) sentiment = "Positive" if prediction[0][0] > 0.3 else "Negative" print(f'Predicted Sentiment: {sentiment} (Probability: {prediction[0][0]:.2f})')