|
--- |
|
library_name: transformers |
|
tags: |
|
- roberta |
|
datasets: |
|
- pubmed |
|
language: |
|
- en |
|
--- |
|
# Model Card for Model ID |
|
|
|
base_model : [FacebookAI/xlm-roberta-large](https://huggingface.co/FacebookAI/xlm-roberta-large) |
|
|
|
hidden_size : 1024 |
|
|
|
max_position_embeddings : 512 |
|
|
|
num_attention_heads : 16 |
|
|
|
num_hidden_layers : 24 |
|
|
|
vocab_size : 250002 |
|
|
|
# Basic usage |
|
|
|
```python |
|
from transformers import AutoTokenizer, AutoModelForTokenClassification |
|
import numpy as np |
|
|
|
# match tag |
|
id2tag = {0:'O', 1:'B_MT', 2:'I_MT'} |
|
|
|
# load model & tokenizer |
|
MODEL_NAME = 'MDDDDR/roberta_large_NER' |
|
|
|
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME) |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
# prepare input |
|
text = 'mental disorder can also contribute to the development of diabetes through various mechanism including increased stress, poor self care behavior, and adverse effect on glucose metabolism.' |
|
tokenized = tokenizer(text, return_tensors='pt') |
|
|
|
# forward pass |
|
output = model(**tokenized) |
|
|
|
# result |
|
pred = np.argmax(output[0].cpu().detach().numpy(), axis=2)[0][1:-1] |
|
|
|
# check pred |
|
for txt, pred in zip(tokenizer.tokenize(text), pred): |
|
print("{}\t{}".format(id2tag[pred], txt)) |
|
# B_MT ▁mental |
|
# B_MT ▁disorder |
|
``` |