cointegrated commited on
Commit
e216ec5
1 Parent(s): a6afaee

the first commit

Browse files
Files changed (2) hide show
  1. app.py +39 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
7
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
9
+ if torch.cuda.is_available():
10
+ model.cuda()
11
+
12
+
13
+ def text2toxicity(text, aggregate=True):
14
+ """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
15
+ with torch.no_grad():
16
+ inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
17
+ proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
18
+ if isinstance(text, str):
19
+ proba = proba[0]
20
+ if aggregate:
21
+ return 1 - proba.T[0] * (1 - proba.T[-1])
22
+ return proba
23
+
24
+
25
+ text = st.text_area('Введите текст')
26
+ proba = text2toxicity(text, aggregate=False)
27
+ s = pd.Series(
28
+ proba.tolist() + [proba[0] * (1 - proba[-1])],
29
+ index=[
30
+ 'Стиль НЕтоксичный',
31
+ 'Есть оскорбление',
32
+ 'Есть непотребство',
33
+ 'Есть угроза',
34
+ 'Смысл текста неприемлемый',
35
+ 'Текст - ОК'
36
+ ],
37
+ name='Оценка вероятности'
38
+ )
39
+ s
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ sentencepiece
3
+ pandas
4
+ streamlit