Spaces:

cointegrated
/

toxic-classifier-ru

Runtime error

cointegrated commited on Sep 17, 2021

Commit

e216ec5

•

1 Parent(s): a6afaee

the first commit

Files changed (2) hide show

app.py ADDED Viewed

+import pandas as pd
+import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
+if torch.cuda.is_available():
+    model.cuda()
+def text2toxicity(text, aggregate=True):
+    """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
+    with torch.no_grad():
+        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
+        proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
+    if isinstance(text, str):
+        proba = proba[0]
+    if aggregate:
+        return 1 - proba.T[0] * (1 - proba.T[-1])
+    return proba
+text = st.text_area('Введите текст')
+proba = text2toxicity(text, aggregate=False)
+s = pd.Series(
+    proba.tolist() + [proba[0] * (1 - proba[-1])],
+    index=[
+        'Стиль НЕтоксичный',
+        'Есть оскорбление',
+        'Есть непотребство',
+        'Есть угроза',
+        'Смысл текста неприемлемый',
+        'Текст - ОК'
+    ],
+    name='Оценка вероятности'
+)
+s

requirements.txt ADDED Viewed

+transformers
+sentencepiece
+pandas
+streamlit