Spaces:

DmitriyMineev
/

cpu

Sleeping

cpu / pages /toxic.py

5ba7acb 3 months ago

1.96 kB

	import streamlit as st
	import torch
	import torch.nn as nn
	import transformers

	NUMBER_OF_CLASSES = 2

	# Загружаем токенизатор
	tokenizer = transformers.BertTokenizer.from_pretrained("cointegrated/rubert-tiny-toxicity")
	bert_model = transformers.BertModel.from_pretrained("cointegrated/rubert-tiny-toxicity")

	# Создаём пустую модель без загрузки весов извне
	class toxicBERT(nn.Module):
	def __init__(self):
	super().__init__()
	self.bert = bert_model
	# создаём модель по конфигу, без весов
	self.linear = nn.Sequential(
	nn.Linear(312, 256),
	nn.Sigmoid(),
	nn.Linear(256, NUMBER_OF_CLASSES)
	)

	def forward(self, x):
	bert_out = self.bert(x[0], attention_mask=x[1])
	normed_bert_out = nn.functional.normalize(bert_out.last_hidden_state[:, 0, :])
	out = self.linear(normed_bert_out)
	return out

	# Загружаем state_dict
	@st.cache_resource
	def load_model():
	model = toxicBERT()
	state_dict = torch.load("models/toxicBERT.pt", map_location="cpu")
	model.load_state_dict(state_dict)
	model.eval()
	return model

	# Интерфейс Streamlit
	st.title("Оценка токсичности комментария (на русском)")
	text = st.text_area("Введите комментарий:")

	if st.button("Проверить токсичность") and text.strip():
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
	input_ids = inputs["input_ids"]
	attention_mask = inputs["attention_mask"]

	model = load_model()
	with torch.no_grad():
	output = model((inputs['input_ids'], inputs['attention_mask']))
	probs = torch.softmax(output, dim=1)
	score = probs[0][1].item()

	st.write(f"Токсичность: {score:.2f}")
	st.progress(score)