import json import pickle import pandas as pd import streamlit as st import torch import torch.nn as nn import transformers from model.funcs import (create_model_and_tokenizer, execution_time, load_model, predict_sentiment) from model.model import LSTMConcatAttentionEmbed from preprocessing.preprocessing import data_preprocessing from preprocessing.rnn_preprocessing import preprocess_single_string @st.cache_resource def load_logreg(): with open("vectorizer.pkl", "rb") as f: logreg_vectorizer = pickle.load(f) with open("logreg_model.pkl", "rb") as f: logreg_predictor = pickle.load(f) return logreg_vectorizer, logreg_predictor logreg_vectorizer, logreg_predictor = load_logreg() @st.cache_resource def load_lstm(): with open("model/vocab.json", "r") as f: vocab_to_int = json.load(f) with open("model/int_vocab.json", "r") as f: int_to_vocab = json.load(f) model_concat_embed = LSTMConcatAttentionEmbed() model_concat_embed.load_state_dict(torch.load("model/model_weights.pt")) return vocab_to_int, int_to_vocab, model_concat_embed vocab_to_int, int_to_vocab, model_concat_embed = load_lstm() @st.cache_resource def load_bert(): model_class = transformers.AutoModel tokenizer_class = transformers.AutoTokenizer pretrained_weights = "cointegrated/rubert-tiny2" weights_path = "model/best_bert_weights.pth" model = load_model(model_class, pretrained_weights, weights_path) tokenizer = tokenizer_class.from_pretrained(pretrained_weights) return model, tokenizer model, tokenizer = load_bert() @execution_time def plot_and_predict(review: str, SEQ_LEN: int, model: nn.Module): inp = preprocess_single_string(review, SEQ_LEN, vocab_to_int) model.eval() with torch.inference_mode(): pred, _ = model(inp.long().unsqueeze(0)) pred = pred.sigmoid().item() return 1 if pred > 0.75 else 0 def preprocess_text_logreg(text): # Apply preprocessing steps (cleaning, tokenization, vectorization) clean_text = data_preprocessing( text ) # Assuming data_preprocessing is your preprocessing function vectorized_text = logreg_vectorizer.transform([" ".join(clean_text)]) return vectorized_text # Define function for making predictions @execution_time def predict_sentiment_logreg(text): # Preprocess input text processed_text = preprocess_text_logreg(text) # Make prediction prediction = logreg_predictor.predict(processed_text) return prediction metrics = { "Models": ["Logistic Regression", "LSTM + attention", "ruBERTtiny2"], "f1-macro score": [0.94376, 0.93317, 0.94070], } col1, col2 = st.columns([1, 3]) df = pd.DataFrame(metrics) df.set_index("Models", inplace=True) df.index.name = "Model" st.sidebar.title("Model Selection") model_type = st.sidebar.radio("Select Model Type", ["Classic ML", "LSTM", "BERT"]) st.title("Review Prediction") # Streamlit app code st.title("Sentiment Analysis with Logistic Regression") text_input = st.text_input("Enter your review:") if st.button("Predict"): if model_type == "Classic ML": prediction = predict_sentiment_logreg(text_input) elif model_type == "LSTM": prediction = plot_and_predict( review=text_input, SEQ_LEN=25, model=model_concat_embed ) elif model_type == "BERT": prediction = predict_sentiment(text_input, model, tokenizer, "cpu") if prediction == 1: st.write("prediction") st.write("Отзыв положительный") elif prediction == 0: st.write("prediction") st.write("Отзыв отрицательный") st.write(df)