new_space_nlp / pages /TaskOne.py
Teery's picture
dopset and TaskOne
d128c4f
import streamlit as st
import torch
import torch.nn as nn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import pickle
import time
from dopset import (LSTMClassifier, preprocess_single_string)
import numpy as np
import pandas as pd
from transformers import DistilBertModel, DistilBertTokenizer
loaded = pickle.load(open('BertWeight/log.pkl', "rb"))
model_BERT = DistilBertModel.from_pretrained("BertWeight/pt_save_pretrained")
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
st.title('Отзывы')
@st.cache_resource
def load_model_and_vectorizer():
loaded_vectorizer = pickle.load(open('tfidf/tfidf_vectorizer.pkl', 'rb'))
loaded_model = pickle.load(open('tfidf/logistic_regression_model.pkl', 'rb'))
return loaded_vectorizer, loaded_model
def logreg(text):
weight_vect, weight_model = load_model_and_vectorizer()
start_time = time.time()
input_ids = weight_vect.transform([text])
output = weight_model.predict(input_ids)
end_time = time.time()
if output == 1:
return "положительный", round(end_time - start_time, 5)
else:
return "Негативный", round(end_time - start_time, 5)
def lstm_(text):
EMBEDDING_DIM = 64
HIDDEN_DIM = 16
DEVICE = 'cpu'
model = LSTMClassifier(embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_DIM).to(DEVICE)
model.load_state_dict(torch.load('lstm/lstm_weights.pt', map_location=DEVICE))
start_time = time.time()
pred = model(preprocess_single_string(text, seq_len=128).unsqueeze(0).to(DEVICE)).sigmoid().round().item()
end_time = time.time()
if pred == 1:
return "положительный", round(end_time - start_time, 5)
else:
return "Негативный", round(end_time - start_time, 5)
def bert_(text, model, loaded_model):
start_time = time.time()
tokenized_text = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=64)
input_ids = torch.tensor(tokenized_text).unsqueeze(0)
with torch.no_grad():
outputs = model(input_ids)
vectors = outputs[0][:,0,:].detach().cpu().numpy()
prediction = loaded_model.predict(vectors).item()
end_time = time.time()
if prediction == 1:
return "положительный", round(end_time - start_time, 5)
else:
return "Негативный", round(end_time - start_time, 5)
table_f1 = {'Model': ['Tf-IDF + logreg', 'LSTM', 'Bert'],
'F1-score':['0.91', '0.94', '0.74']}
df = pd.DataFrame(table_f1)
text = st.text_input("Напишите отзыв")
if text:
rate, time_ = logreg(text)
st.markdown('### Tf-IDF + logreg')
st.write('Отзыв:', rate)
st.write('Время:', time_)
rate_lstm, time_lstm = lstm_(text)
st.markdown('### LSTM')
st.write('Отзыв:', rate_lstm)
st.write('Время:', time_lstm)
rate_bert, time_bert = bert_(text, model_BERT, loaded)
st.markdown('### BERT')
st.write('Отзыв:', rate_bert)
st.write('Время:', time_bert)
st.table(df)