Spaces:
Running
Running
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D | |
from tensorflow.keras.callbacks import EarlyStopping | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score, confusion_matrix | |
from tensorflow.keras.datasets import imdb | |
# Load the dataset | |
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000) | |
# Data Preprocessing | |
max_words = 500 | |
max_len = 500 | |
embedding_dim = 128 | |
X_train = pad_sequences(X_train, maxlen=max_len) | |
X_test = pad_sequences(X_test, maxlen=max_len) | |
# Build the Model | |
model = Sequential() | |
model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len)) | |
model.add(SpatialDropout1D(0.2)) | |
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) | |
# Train the Model | |
X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) | |
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) | |
history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping]) | |
# Evaluate the Model | |
loss, accuracy = model.evaluate(X_test, y_test) | |
st.write(f'Test Accuracy: {accuracy:.4f}') | |
# Plotting functions | |
def plot_accuracy(history): | |
plt.plot(history.history['accuracy']) | |
plt.plot(history.history['val_accuracy']) | |
plt.title('Model accuracy') | |
plt.ylabel('Accuracy') | |
plt.xlabel('Epoch') | |
plt.legend(['Train', 'Validation'], loc='upper left') | |
st.pyplot(plt) | |
def plot_loss(history): | |
plt.plot(history.history['loss']) | |
plt.plot(history.history['val_loss']) | |
plt.title('Model loss') | |
plt.ylabel('Loss') | |
plt.xlabel('Epoch') | |
plt.legend(['Train', 'Validation'], loc='upper left') | |
st.pyplot(plt) | |
# Display plots | |
plot_accuracy(history) | |
plot_loss(history) | |
# Text Input and Prediction | |
st.header("Movie Review Sentiment Analysis") | |
review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.") | |
# Tokenization and padding | |
tokenizer = Tokenizer(num_words=5000) | |
tokenizer.fit_on_texts(review_input) | |
review_seq = tokenizer.texts_to_sequences([review_input]) | |
review_pad = pad_sequences(review_seq, maxlen=max_len) | |
# Prediction | |
if st.button("Classify Review"): | |
prediction = (model.predict(review_pad) > 0.5).astype("int32") | |
sentiment = "Positive" if prediction[0][0] == 1 else "Negative" | |
st.write(f'Sentiment: **{sentiment}**') | |