File size: 1,335 Bytes
d3d0074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
import streamlit as st
import pickle
import streamlit as st

@st.cache_resource
def get_model():
    model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
    tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
    return model, tokenizer

def predict_bert(input_text):
    MAX_LEN = 300

    model, tokenizer = get_model()

    tokenized_input = tokenizer.encode(input_text, add_special_tokens=True, truncation=True, max_length=MAX_LEN)
    padded_input = np.array(tokenized_input + [0]*(MAX_LEN-len(tokenized_input)))
    attention_mask = np.where(padded_input != 0, 1, 0)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    with torch.no_grad():
        input_tensor = torch.tensor(padded_input).unsqueeze(0).to(device)
        attention_mask_tensor = torch.tensor(attention_mask).unsqueeze(0).to(device)
        last_hidden_states = model(input_tensor, attention_mask=attention_mask_tensor)[0]

    features = last_hidden_states[:,0,:].cpu().numpy()

    with open('pages/film_review/model/log_reg_bert.pkl', 'rb') as f:
        loaded_model = pickle.load(f)

    prediction = loaded_model.predict(features)

    return prediction[0]