Spaces:
Sleeping
Sleeping
import numpy as np | |
import torch | |
from transformers import AutoTokenizer, AutoModel | |
from sklearn.linear_model import LogisticRegression | |
import streamlit as st | |
import pickle | |
import streamlit as st | |
def get_model(): | |
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") | |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") | |
return model, tokenizer | |
def predict_bert(input_text): | |
MAX_LEN = 300 | |
model, tokenizer = get_model() | |
tokenized_input = tokenizer.encode(input_text, add_special_tokens=True, truncation=True, max_length=MAX_LEN) | |
padded_input = np.array(tokenized_input + [0]*(MAX_LEN-len(tokenized_input))) | |
attention_mask = np.where(padded_input != 0, 1, 0) | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
model.to(device) | |
with torch.no_grad(): | |
input_tensor = torch.tensor(padded_input).unsqueeze(0).to(device) | |
attention_mask_tensor = torch.tensor(attention_mask).unsqueeze(0).to(device) | |
last_hidden_states = model(input_tensor, attention_mask=attention_mask_tensor)[0] | |
features = last_hidden_states[:,0,:].cpu().numpy() | |
with open('pages/film_review/model/log_reg_bert.pkl', 'rb') as f: | |
loaded_model = pickle.load(f) | |
prediction = loaded_model.predict(features) | |
return prediction[0] | |