VicGerardoPR
/

ReviewSentiment

Model card Files Files and versions Community

ReviewSentiment / app.py

VicGerardoPR's picture

Upload 4 files

c0b8b15 verified about 2 months ago

history blame contribute delete

3.51 kB

	import streamlit as st
	import tensorflow as tf
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import pickle

	# Global configuration for text processing
	max_sequence_length = 100 # Maximum length of input sequences
	embedding_dim = 100 # Dimension of word embeddings

	def create_model(vocab_size):
	"""
	Creates a Bidirectional LSTM model for sentiment analysis

	Args:
	vocab_size: Size of the vocabulary (number of unique words + 1)

	Returns:
	Compiled Keras model
	"""
	model = Sequential([
	Input(shape=(max_sequence_length,)),
	Embedding(input_dim=vocab_size, output_dim=embedding_dim), # Word embedding layer
	Bidirectional(LSTM(128, return_sequences=False)), # Bidirectional LSTM
	Dropout(0.5), # Dropout for regularization
	Dense(64, activation='relu'), # Dense hidden layer
	Dropout(0.5), # Additional dropout
	Dense(3, activation='softmax') # Output layer (3 classes)
	])
	model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
	return model

	@st.cache_resource
	def load_model_and_tokenizer():
	"""
	Loads the pretrained model and tokenizer

	Returns:
	tuple: (model, tokenizer)
	"""
	# Load the tokenizer from pickle file
	with open('tokenizer.pickle', 'rb') as handle:
	tokenizer = pickle.load(handle)

	# Create and load model weights
	vocab_size = len(tokenizer.word_index) + 1
	model = create_model(vocab_size)
	model.load_weights('lstm.keras')
	return model, tokenizer

	def preprocess_text(text, tokenizer):
	"""
	Preprocesses input text for model prediction

	Args:
	text: Input text string
	tokenizer: Keras tokenizer object

	Returns:
	Padded sequence ready for model input
	"""
	sequences = tokenizer.texts_to_sequences([text])
	return pad_sequences(sequences, maxlen=max_sequence_length)

	def main():
	"""Main function for the Streamlit app"""
	st.title("Sentiment Analyzer")

	try:
	# Load model and tokenizer
	model, tokenizer = load_model_and_tokenizer()
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return

	# Text input area
	text = st.text_area("Enter text to analyze:", height=150)

	if st.button("Analyze"):
	if text:
	# Process input and make prediction
	processed_text = preprocess_text(text, tokenizer)
	prediction = model.predict(processed_text)
	sentiments = ['Negative', 'Neutral', 'Positive']
	result = sentiments[np.argmax(prediction)]

	# Display results
	st.write(f"Detected sentiment: {result}")

	# Show probability distribution
	probabilities = prediction[0]
	for sent, prob in zip(sentiments, probabilities):
	st.progress(float(prob))
	st.write(f"{sent}: {prob:.2%}")
	else:
	st.warning("Please enter text to analyze.")

	if __name__ == "__main__":
	main()