|
--- |
|
license: mit |
|
language: |
|
- en |
|
pipeline_tag: token-classification |
|
--- |
|
|
|
|
|
## Usage |
|
|
|
```python |
|
|
|
from transformers import AutoModelForTokenClassification, AutoTokenizer |
|
import torch |
|
|
|
|
|
model = AutoModelForTokenClassification.from_pretrained('Sinanmz/toxicity_token_classifier') |
|
tokenizer = AutoTokenizer.from_pretrained('Sinanmz/toxicity_token_classifier') |
|
|
|
def test_model(text): |
|
inputs = tokenizer(text, return_tensors='pt') |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
predictions = np.argmax(logits.detach().numpy(), axis=2) |
|
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) |
|
labels = predictions[0] |
|
labels = labels[1:-1] |
|
tokens = tokens[1:-1] |
|
result = [] |
|
for i in range(len(labels)): |
|
if i > 0 and inputs.word_ids()[i+1] == inputs.word_ids()[i]: |
|
result.popitem() |
|
result.append((tokens[i-1] + tokens[i][2:], model.config.id2label[labels[i-1]])) |
|
else: |
|
result.append((tokens[i], model.config.id2label[labels[i]])) |
|
return result |
|
|
|
|
|
text1 = 'Your face is disgusting.' |
|
print("Result:", test_model(text1)) |
|
# output: |
|
# Result: {'your': 'none', 'face': 'none', 'is': 'none', 'disgusting': 'other toxicity', '.': 'none'} |
|
|
|
|
|
text2 = 'What an ugly person you are.' |
|
print("Result:", test_model(text2)) |
|
# output: |
|
# Result: {'what': 'none', 'an': 'none', 'ugly': 'insult', 'person': 'none', 'you': 'none', 'are': 'none', '.': 'none'} |
|
|
|
|
|
text3 = 'Nice to meet you, sir.' |
|
print("Result:", test_model(text3)) |
|
# output: |
|
# Result: {'nice': 'none', 'to': 'none', 'meet': 'none', 'you': 'none', ',': 'none', 'sir': 'none', '.': 'none'} |
|
|
|
``` |