import streamlit as st from functions_preprocess import LinguisticPreprocessor, download_if_non_existent, CNN import pickle import nltk from datasets import load_dataset import torch nltk.download('stopwords') nltk.download('punkt') download_if_non_existent('corpora/stopwords', 'stopwords') download_if_non_existent('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger') download_if_non_existent('corpora/wordnet', 'wordnet') from torchtext.data.utils import get_tokenizer #################################################################### Streamlit interface st.title("Movie Reviews: An NLP Sentiment analysis") #################################################################### Cache the model loading @st.cache_data() def load_model(): model_pkl_file = "sentiment_model.pkl" with open(model_pkl_file, 'rb') as file: model = pickle.load(file) return model def load_cnn(): model = CNN(16236, 300, 128, [3, 8], 0.5, 2) model.load_state_dict(torch.load('model_cnn.pkl', map_location=torch.device('cpu'))) model.eval() return model def predict_sentiment(text, model, vocab, torch_text = False): tokenizer = get_tokenizer("basic_english") if torch_text == True: processor.transform(text) tokens = tokenizer(text) encoded = [vocab[token] for token in tokens] input_tensor = torch.tensor(encoded).unsqueeze(0).to(device) with torch.no_grad(): # No gradient needed model.eval() # Evaluation mode outputs = model(input_tensor) probs = torch.softmax(outputs, dim=1) pred_class = torch.argmax(probs, dim=1).item() return pred_class # Return the predicted class index else: processor.transform(text) prediction = model.predict([text]) return prediction model_1 = load_model() model_2 = load_cnn() processor = LinguisticPreprocessor() train_data = load_dataset('rotten_tomatoes', split='train') vocab, tokenizer = build_vocab(train_data) ############################################################# Text input with st.expander("Model 1: SGD Classifier"): st.markdown("Give it a go by writing a positive or negative text, and analyze it!") # Text input inside the expander user_input = st.text_area("Enter text here...", key='model1_input') if st.button('Analyze', key='model1_button'): # Displaying output result = predict_sentiment(user_input, model_1) if result >= 0.5: st.write('The sentiment is: Positive 😀', key='model1_poswrite') else: st.write('The sentiment is: Negative 😞', key='model1_negwrite') with st.expander("Model 2: CNN Sentiment analysis"): st.markdown("Give it a go by writing a positive or negative text, and analyze it!") # Text input inside the expander user_input = st.text_area("Enter text here...", key='model2_input') if st.button('Analyze', key='model2_button'): # Displaying output result = predict_sentiment(user_input, model_2, vocab, torch_text=True) if result >= 0.5: st.write('The sentiment is: Positive 😀', key='model2_poswrite') else: st.write('The sentiment is: Negative 😞', key='model2_negwrite') st.caption("Por @efeperro.") stop_words = set(stopwords.words('english'))