|
|
|
from transformers import RobertaTokenizer, RobertaForTokenClassification |
|
import torch |
|
tokenizer = RobertaTokenizer.from_pretrained("Jean-Baptiste/roberta-large-ner-english") |
|
model = RobertaForTokenClassification.from_pretrained("Jean-Baptiste/roberta-large-ner-english") |
|
inputs = tokenizer("HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt") |
|
|
|
with torch.no_grad(): |
|
logits = model(**inputs).logits |
|
|
|
predicted_token_class_ids = logits.argmax(-1) |
|
|
|
|
|
|
|
predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]] |
|
assert predicted_tokens_classes == ['O', 'ORG', 'ORG', 'O', 'O', 'O', 'O', 'O', 'LOC', 'O', 'LOC', 'LOC'] |
|
|
|
labels = predicted_token_class_ids |
|
loss = model(**inputs, labels=labels).loss |
|
ab = round(loss.item(), 2) |
|
import ipdb; ipdb.set_trace() |
|
|