import streamlit as st import torch import torch.nn as nn from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression import pickle import time from dopset import (LSTMClassifier, preprocess_single_string) import numpy as np import pandas as pd from transformers import DistilBertModel, DistilBertTokenizer loaded = pickle.load(open('BertWeight/log.pkl', "rb")) model_BERT = DistilBertModel.from_pretrained("BertWeight/pt_save_pretrained") tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') st.title('Отзывы') @st.cache_resource def load_model_and_vectorizer(): loaded_vectorizer = pickle.load(open('tfidf/tfidf_vectorizer.pkl', 'rb')) loaded_model = pickle.load(open('tfidf/logistic_regression_model.pkl', 'rb')) return loaded_vectorizer, loaded_model def logreg(text): weight_vect, weight_model = load_model_and_vectorizer() start_time = time.time() input_ids = weight_vect.transform([text]) output = weight_model.predict(input_ids) end_time = time.time() if output == 1: return "положительный", round(end_time - start_time, 5) else: return "Негативный", round(end_time - start_time, 5) def lstm_(text): EMBEDDING_DIM = 64 HIDDEN_DIM = 16 DEVICE = 'cpu' model = LSTMClassifier(embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_DIM).to(DEVICE) model.load_state_dict(torch.load('lstm/lstm_weights.pt', map_location=DEVICE)) start_time = time.time() pred = model(preprocess_single_string(text, seq_len=128).unsqueeze(0).to(DEVICE)).sigmoid().round().item() end_time = time.time() if pred == 1: return "положительный", round(end_time - start_time, 5) else: return "Негативный", round(end_time - start_time, 5) def bert_(text, model, loaded_model): start_time = time.time() tokenized_text = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=64) input_ids = torch.tensor(tokenized_text).unsqueeze(0) with torch.no_grad(): outputs = model(input_ids) vectors = outputs[0][:,0,:].detach().cpu().numpy() prediction = loaded_model.predict(vectors).item() end_time = time.time() if prediction == 1: return "положительный", round(end_time - start_time, 5) else: return "Негативный", round(end_time - start_time, 5) table_f1 = {'Model': ['Tf-IDF + logreg', 'LSTM', 'Bert'], 'F1-score':['0.91', '0.94', '0.74']} df = pd.DataFrame(table_f1) text = st.text_input("Напишите отзыв") if text: rate, time_ = logreg(text) st.markdown('### Tf-IDF + logreg') st.write('Отзыв:', rate) st.write('Время:', time_) rate_lstm, time_lstm = lstm_(text) st.markdown('### LSTM') st.write('Отзыв:', rate_lstm) st.write('Время:', time_lstm) rate_bert, time_bert = bert_(text, model_BERT, loaded) st.markdown('### BERT') st.write('Отзыв:', rate_bert) st.write('Время:', time_bert) st.table(df)