File size: 1,662 Bytes
1350bf8
 
b391071
 
70abcb5
 
b391071
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
---
license: mit
language:
- en
pipeline_tag: token-classification
---


## Usage

```python

from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch


model = AutoModelForTokenClassification.from_pretrained('Sinanmz/toxicity_token_classifier')
tokenizer = AutoTokenizer.from_pretrained('Sinanmz/toxicity_token_classifier')

def test_model(text):
    inputs = tokenizer(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predictions = np.argmax(logits.detach().numpy(), axis=2)
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    labels = predictions[0]
    labels = labels[1:-1]
    tokens = tokens[1:-1]
    result = []
    for i in range(len(labels)):
        if i > 0 and inputs.word_ids()[i+1] == inputs.word_ids()[i]:
            result.popitem()
            result.append((tokens[i-1] + tokens[i][2:], model.config.id2label[labels[i-1]]))
        else:
            result.append((tokens[i], model.config.id2label[labels[i]]))
    return result


text1 = 'Your face is disgusting.'
print("Result:", test_model(text1))
# output: 
# Result: {'your': 'none', 'face': 'none', 'is': 'none', 'disgusting': 'other toxicity', '.': 'none'}


text2 = 'What an ugly person you are.'
print("Result:", test_model(text2))
# output: 
# Result: {'what': 'none', 'an': 'none', 'ugly': 'insult', 'person': 'none', 'you': 'none', 'are': 'none', '.': 'none'}


text3 = 'Nice to meet you, sir.'
print("Result:", test_model(text3))
# output: 
# Result: {'nice': 'none', 'to': 'none', 'meet': 'none', 'you': 'none', ',': 'none', 'sir': 'none', '.': 'none'}

```