Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.linear_model import LogisticRegression | |
import pickle | |
import time | |
from dopset import (LSTMClassifier, preprocess_single_string) | |
import numpy as np | |
import pandas as pd | |
from transformers import DistilBertModel, DistilBertTokenizer | |
loaded = pickle.load(open('BertWeight/log.pkl', "rb")) | |
model_BERT = DistilBertModel.from_pretrained("BertWeight/pt_save_pretrained") | |
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') | |
st.title('Отзывы') | |
def load_model_and_vectorizer(): | |
loaded_vectorizer = pickle.load(open('tfidf/tfidf_vectorizer.pkl', 'rb')) | |
loaded_model = pickle.load(open('tfidf/logistic_regression_model.pkl', 'rb')) | |
return loaded_vectorizer, loaded_model | |
def logreg(text): | |
weight_vect, weight_model = load_model_and_vectorizer() | |
start_time = time.time() | |
input_ids = weight_vect.transform([text]) | |
output = weight_model.predict(input_ids) | |
end_time = time.time() | |
if output == 1: | |
return "положительный", round(end_time - start_time, 5) | |
else: | |
return "Негативный", round(end_time - start_time, 5) | |
def lstm_(text): | |
EMBEDDING_DIM = 64 | |
HIDDEN_DIM = 16 | |
DEVICE = 'cpu' | |
model = LSTMClassifier(embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_DIM).to(DEVICE) | |
model.load_state_dict(torch.load('lstm/lstm_weights.pt', map_location=DEVICE)) | |
start_time = time.time() | |
pred = model(preprocess_single_string(text, seq_len=128).unsqueeze(0).to(DEVICE)).sigmoid().round().item() | |
end_time = time.time() | |
if pred == 1: | |
return "положительный", round(end_time - start_time, 5) | |
else: | |
return "Негативный", round(end_time - start_time, 5) | |
def bert_(text, model, loaded_model): | |
start_time = time.time() | |
tokenized_text = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=64) | |
input_ids = torch.tensor(tokenized_text).unsqueeze(0) | |
with torch.no_grad(): | |
outputs = model(input_ids) | |
vectors = outputs[0][:,0,:].detach().cpu().numpy() | |
prediction = loaded_model.predict(vectors).item() | |
end_time = time.time() | |
if prediction == 1: | |
return "положительный", round(end_time - start_time, 5) | |
else: | |
return "Негативный", round(end_time - start_time, 5) | |
table_f1 = {'Model': ['Tf-IDF + logreg', 'LSTM', 'Bert'], | |
'F1-score':['0.91', '0.94', '0.74']} | |
df = pd.DataFrame(table_f1) | |
text = st.text_input("Напишите отзыв") | |
if text: | |
rate, time_ = logreg(text) | |
st.markdown('### Tf-IDF + logreg') | |
st.write('Отзыв:', rate) | |
st.write('Время:', time_) | |
rate_lstm, time_lstm = lstm_(text) | |
st.markdown('### LSTM') | |
st.write('Отзыв:', rate_lstm) | |
st.write('Время:', time_lstm) | |
rate_bert, time_bert = bert_(text, model_BERT, loaded) | |
st.markdown('### BERT') | |
st.write('Отзыв:', rate_bert) | |
st.write('Время:', time_bert) | |
st.table(df) | |