File size: 1,255 Bytes
019c64d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
import joblib
from time import time

dict = {0: 'Нейтральный', 1: 'Положительный', 2: 'Отрицательный'}
def preprocess_bert(text):
    start_time = time()
    tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
    model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
    t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**{k: v.to(model.device) for k, v in t.items()})
    embeddings = model_output.last_hidden_state[:, 0, :]
    embeddings = torch.nn.functional.normalize(embeddings)
    embeddings = embeddings.detach().cpu().numpy()

    lr = LogisticRegression()
    lr = joblib.load('model/lr_weights.pkl')
    # with open('model/lr_weights.pkl', 'rb') as f:
    #     lr = pickle.load(f) 
    predicted_label = lr.predict(embeddings)
    predicted_label_text = dict[predicted_label[0]]
    end_time = time()

    inference_time = end_time - start_time
    return f"***{predicted_label_text}***, время предсказания: ***{inference_time:.4f} сек***."