File size: 3,808 Bytes
85291bb
 
 
 
 
 
 
 
 
 
 
 
2600008
85291bb
 
2600008
 
 
85291bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fa41f7
85291bb
 
 
5fa41f7
85291bb
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
from spacy import displacy 
from transformers import (AutoModelForTokenClassification, 
                          AutoTokenizer, 
                          pipeline,
                          )

model_checkpoint = "jsylee/scibert_scivocab_uncased-finetuned-ner"

model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, 
                                                        num_labels=5,
                                                        id2label={0: 'O', 1: 'DRUG', 2: 'DRUG', 3: 'ADVERSE EFFECT', 4: 'ADVERSE EFFECT'} # for grouping BIO tags back together
                                                        )                                                        
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model.to("cpu")

model_pipeline = pipeline(task="ner", model=model, tokenizer=tokenizer, device=-1, grouped_entities=True)

def extract_entities(sentence):
    """ Extract drug and reaction entities, and show using displaCy's NER visualizer.
    
    source: https://github.com/jsylee/personal-projects/blob/master/Hugging%20Face%20ADR%20Fine-Tuning/SciBERT%20ADR%20Fine-Tuning.ipynb
    """
    tokens = model_pipeline(sentence)
    entities = []
    
    for token in tokens:
        label = token["entity_group"]

        if label != "0":
            # label 0 corresponds to "Outside" any entity we care about
            token["label"] = label
            entities.append(token)
    
    params = [{"text": sentence,
               "ents": entities,
               "title": None}]
    
    return displacy.render(params, style="ent", manual=True, options={
        "colors": {
                   "DRUG": "#f08080",
                   "ADVERSE EFFECT": "#9bddff",
               },
    })

# the following examples of adverse effects are taken from Wikipedia: 
# https://en.wikipedia.org/wiki/Adverse_effect#Medications

examples = [
    "Abortion, miscarriage or uterine hemorrhage associated with misoprostol (Cytotec), a labor-inducing drug.",
    "Addiction to many sedatives and analgesics, such as diazepam, morphine, etc.",
    "Birth defects associated with thalidomide",
    "Bleeding of the intestine associated with aspirin therapy",
    "Cardiovascular disease associated with COX-2 inhibitors (i.e. Vioxx)",
    "Deafness and kidney failure associated with gentamicin (an antibiotic)",
    "Death, following sedation, in children using propofol (Diprivan)",
    "Depression or hepatic injury caused by interferon",
    "Diabetes caused by atypical antipsychotic medications (neuroleptic psychiatric drugs)"
]

footer = """
<hr>
This app automatically extracts drug names and adverse effects from the input text. An adverse effect occurs when a drug harms a patient in any way.

The extraction is done by a <a target="_blank" rel="noopener noreferrer" href=https://huggingface.co/jsylee/scibert_scivocab_uncased-finetuned-ner>SciBERT model</a> fine-tuned on the <a target="_blank" rel="noopener noreferrer" href=https://huggingface.co/datasets/ade_corpus_v2>`ade_corpus_v2`</a> dataset. Fine-tuning code <a target="_blank" rel="noopener noreferrer" href=https://github.com/jsylee/personal-projects/blob/master/Hugging%20Face%20ADR%20Fine-Tuning/SciBERT%20ADR%20Fine-Tuning.ipynb>here</a>.

This was made during the November 2021 Hugging Face Community Event.

By <a target="_blank" rel="noopener noreferrer" href=http://www.columbia.edu/~jsl2239/>Justin S. Lee</a>
"""

iface = gr.Interface(fn=extract_entities, inputs=gr.inputs.Textbox(lines=5, placeholder="Abortion, miscarriage or uterine hemorrhage associated with misoprostol..."),
                     outputs="html", examples=examples,
                     title="NER for Drug Names and Adverse Effects",
                     article=footer)
iface.launch()