Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import transformers | |
NUMBER_OF_CLASSES = 2 | |
# Загружаем токенизатор | |
tokenizer = transformers.BertTokenizer.from_pretrained("cointegrated/rubert-tiny-toxicity") | |
bert_model = transformers.BertModel.from_pretrained("cointegrated/rubert-tiny-toxicity") | |
# Создаём пустую модель без загрузки весов извне | |
class toxicBERT(nn.Module): | |
def __init__(self): | |
super().__init__() | |
self.bert = bert_model | |
# создаём модель по конфигу, без весов | |
self.linear = nn.Sequential( | |
nn.Linear(312, 256), | |
nn.Sigmoid(), | |
nn.Linear(256, NUMBER_OF_CLASSES) | |
) | |
def forward(self, x): | |
bert_out = self.bert(x[0], attention_mask=x[1]) | |
normed_bert_out = nn.functional.normalize(bert_out.last_hidden_state[:, 0, :]) | |
out = self.linear(normed_bert_out) | |
return out | |
# Загружаем state_dict | |
def load_model(): | |
model = toxicBERT() | |
state_dict = torch.load("models/toxicBERT.pt", map_location="cpu") | |
model.load_state_dict(state_dict) | |
model.eval() | |
return model | |
# Интерфейс Streamlit | |
st.title("Оценка токсичности комментария (на русском)") | |
text = st.text_area("Введите комментарий:") | |
if st.button("Проверить токсичность") and text.strip(): | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128) | |
input_ids = inputs["input_ids"] | |
attention_mask = inputs["attention_mask"] | |
model = load_model() | |
with torch.no_grad(): | |
output = model((inputs['input_ids'], inputs['attention_mask'])) | |
probs = torch.softmax(output, dim=1) | |
score = probs[0][1].item() | |
st.write(f"Токсичность: {score:.2f}") | |
st.progress(score) | |