RaTE-NER-Deberta

This model is a fine-tuned version of DeBERTa on the RaTE-NER dataset.

Model description

This model is trained to serve the RaTEScore metric, if you are interested in our pipeline, please refer to our paper and Github.

This model also can be used to extract Abnormality, Non-Abnormality, Anatomy, Disease, Non-Disease in medical radiology reports.

Usage

Click to expand the usage of this model.

from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
def post_process(tokenized_text, predicted_entities, tokenizer):
    entity_spans = []
    start = end = None
    entity_type = None
    for i, (token, label) in enumerate(zip(tokenized_text, predicted_entities[:len(tokenized_text)])):
        if token in ["[CLS]", "[SEP]"]:
            continue
        if label != "O" and i < len(predicted_entities) - 1:
            if label.startswith("B-") and predicted_entities[i+1].startswith("I-"):
                start = i
                entity_type = label[2:]
            elif label.startswith("B-") and predicted_entities[i+1].startswith("B-"):
                start = i
                end = i
                entity_spans.append((start, end, label[2:]))
                start = i
                entity_type = label[2:]
            elif label.startswith("B-") and predicted_entities[i+1].startswith("O"):
                start = i
                end = i
                entity_spans.append((start, end, label[2:]))
                start = end = None
                entity_type = None
            elif label.startswith("I-") and predicted_entities[i+1].startswith("B-"):
                end = i
                if start is not None:
                    entity_spans.append((start, end, entity_type))
                start = i
                entity_type = label[2:]
            elif label.startswith("I-") and predicted_entities[i+1].startswith("O"):
                end = i
                if start is not None:
                    entity_spans.append((start, end, entity_type))
                start = end = None
                entity_type = None
    if start is not None and end is None:
        end = len(tokenized_text) - 2
        entity_spans.append((start, end, entity_type))
    save_pair = []
    for start, end, entity_type in entity_spans:
        entity_str = tokenizer.convert_tokens_to_string(tokenized_text[start:end+1])
        save_pair.append((entity_str, entity_type))
    return save_pair

def run_ner(texts, idx2label, tokenizer, model, device): inputs = tokenizer(texts, max_length=512, padding=True, truncation=True, return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) predicted_labels = torch.argmax(outputs.logits, dim=2).tolist() save_pairs = [] for i in range(len(texts)): predicted_entities = [idx2label[label] for label in predicted_labels[i]] non_pad_mask = inputs["input_ids"][i] != tokenizer.pad_token_id non_pad_length = non_pad_mask.sum().item() non_pad_input_ids = inputs["input_ids"][i][:non_pad_length] tokenized_text = tokenizer.convert_ids_to_tokens(non_pad_input_ids) save_pair = post_process(tokenized_text, predicted_entities, tokenizer) if i == 0: save_pairs = save_pair else: save_pairs.extend(save_pair) return save_pairs

ner_labels = ['B-ABNORMALITY', 'I-ABNORMALITY', 'B-NON-ABNORMALITY', 'I-NON-ABNORMALITY', 'B-DISEASE', 'I-DISEASE', 'B-NON-DISEASE', 'I-NON-DISEASE', 'B-ANATOMY', 'I-ANATOMY', 'O'] idx2label = {i: label for i, label in enumerate(ner_labels)}

tokenizer = AutoTokenizer.from_pretrained('Angelakeke/RaTE-NER-Deberta') model = AutoModelForTokenClassification.from_pretrained('Angelakeke/RaTE-NER-Deberta')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval()

We recommend to inference by sentences.

text = ""

texts = text.split('. ') save_pair = run_ner(texts, idx2label, tokenizer, model, device)

Author

Author: Weike Zhao

If you have any questions, please feel free to contact zwk0629@sjtu.edu.cn.

Citation

@inproceedings{zhao2024ratescore,
  title={RaTEScore: A Metric for Radiology Report Generation},
  author={Zhao, Weike and Wu, Chaoyi and Zhang, Xiaoman and Zhang, Ya and Wang, Yanfeng and Xie, Weidi},
  booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
  pages={15004--15019},
  year={2024}
}
Downloads last month
8,084
Safetensors
Model size
184M params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.