jsylee commited on
Commit
85291bb
1 Parent(s): e70b43a

Add initial app files

Browse files
Files changed (2) hide show
  1. app.py +75 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from spacy import displacy
3
+ from transformers import (AutoModelForTokenClassification,
4
+ AutoTokenizer,
5
+ pipeline,
6
+ )
7
+
8
+ model_checkpoint = "jsylee/scibert_scivocab_uncased-finetuned-ner"
9
+
10
+ model = AutoModelForTokenClassification.from_pretrained(model_checkpoint,
11
+ num_labels=5,
12
+ id2label={0: 'O', 1: 'DRUG', 2: 'DRUG', 3: 'ADVERSE EFFECT', 4: 'ADVERSE EFFECT'} # for grouping BIO tags back together
13
+ )
14
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
15
+
16
+ model_pipeline = pipeline(task="ner", model=model, tokenizer=tokenizer, device=0, grouped_entities=True)
17
+
18
+ def extract_entities(sentence):
19
+ """ Extract drug and reaction entities, and show using displaCy's NER visualizer.
20
+
21
+ source: https://github.com/jsylee/personal-projects/blob/master/Hugging%20Face%20ADR%20Fine-Tuning/SciBERT%20ADR%20Fine-Tuning.ipynb
22
+ """
23
+ tokens = model_pipeline(sentence)
24
+ entities = []
25
+
26
+ for token in tokens:
27
+ label = token["entity_group"]
28
+
29
+ if label != "0":
30
+ # label 0 corresponds to "Outside" any entity we care about
31
+ token["label"] = label
32
+ entities.append(token)
33
+
34
+ params = [{"text": sentence,
35
+ "ents": entities,
36
+ "title": None}]
37
+
38
+ return displacy.render(params, style="ent", manual=True, options={
39
+ "colors": {
40
+ "DRUG": "#f08080",
41
+ "ADVERSE EFFECT": "#9bddff",
42
+ },
43
+ })
44
+
45
+ # the following examples of adverse effects are taken from Wikipedia:
46
+ # https://en.wikipedia.org/wiki/Adverse_effect#Medications
47
+
48
+ examples = [
49
+ "Abortion, miscarriage or uterine hemorrhage associated with misoprostol (Cytotec), a labor-inducing drug.",
50
+ "Addiction to many sedatives and analgesics, such as diazepam, morphine, etc.",
51
+ "Birth defects associated with thalidomide",
52
+ "Bleeding of the intestine associated with aspirin therapy",
53
+ "Cardiovascular disease associated with COX-2 inhibitors (i.e. Vioxx)",
54
+ "Deafness and kidney failure associated with gentamicin (an antibiotic)",
55
+ "Death, following sedation, in children using propofol (Diprivan)",
56
+ "Depression or hepatic injury caused by interferon",
57
+ "Diabetes caused by atypical antipsychotic medications (neuroleptic psychiatric drugs)"
58
+ ]
59
+
60
+ footer = """
61
+ <hr>
62
+ This app automatically extracts drug names and adverse effects from the input text. An adverse effect occurs when a drug harms a patient in any way.
63
+
64
+ The extraction is done by a <a href=https://huggingface.co/jsylee/scibert_scivocab_uncased-finetuned-ner>SciBERT model</a> fine-tuned on the <a href=https://huggingface.co/datasets/ade_corpus_v2>`ade_corpus_v2`</a> dataset. Fine-tuning code <a href=https://github.com/jsylee/personal-projects/blob/master/Hugging%20Face%20ADR%20Fine-Tuning/SciBERT%20ADR%20Fine-Tuning.ipynb>here</a>.
65
+
66
+ This was made during the November 2021 Hugging Face Community Event.
67
+
68
+ By <a href=http://www.columbia.edu/~jsl2239/>Justin S. Lee</a>
69
+ """
70
+
71
+ iface = gr.Interface(fn=extract_entities, inputs=gr.inputs.Textbox(lines=5, placeholder="Abortion, miscarriage or uterine hemorrhage associated with misoprostol..."),
72
+ outputs="html", examples=examples,
73
+ title="NER for Drug Names and Adverse Effects",
74
+ article=footer)
75
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ spacy == 3.2.0
2
+ transformers == 4.11.3