import streamlit as st
import torch
import torch.nn as nn
import transformers

NUMBER_OF_CLASSES = 2

# Загружаем токенизатор
tokenizer = transformers.BertTokenizer.from_pretrained("cointegrated/rubert-tiny-toxicity")
bert_model = transformers.BertModel.from_pretrained("cointegrated/rubert-tiny-toxicity")

# Создаём пустую модель без загрузки весов извне
class toxicBERT(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = bert_model
          # создаём модель по конфигу, без весов
        self.linear = nn.Sequential(
            nn.Linear(312, 256),
            nn.Sigmoid(),
            nn.Linear(256, NUMBER_OF_CLASSES)
        )

    def forward(self, x):
        bert_out = self.bert(x[0], attention_mask=x[1])
        normed_bert_out = nn.functional.normalize(bert_out.last_hidden_state[:, 0, :])
        out = self.linear(normed_bert_out)
        return out

# Загружаем state_dict
@st.cache_resource
def load_model():
    model = toxicBERT()
    state_dict = torch.load("models/toxicBERT.pt", map_location="cpu")
    model.load_state_dict(state_dict)
    model.eval()
    return model

# Интерфейс Streamlit
st.title("Оценка токсичности комментария (на русском)")
text = st.text_area("Введите комментарий:")

if st.button("Проверить токсичность") and text.strip():
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    model = load_model()
    with torch.no_grad():
        output = model((inputs['input_ids'], inputs['attention_mask']))
        probs = torch.softmax(output, dim=1)
        score = probs[0][1].item()

    st.write(f"Токсичность: {score:.2f}")
    st.progress(score)