andrewgleave commited on
Commit
24d9d43
1 Parent(s): 3469da9
Files changed (2) hide show
  1. app.py +26 -8
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,23 +1,33 @@
1
  import json
 
2
 
3
  import gradio as gr
 
4
  from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
5
 
6
  tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
7
  model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
8
 
9
- EXAMPLE_TEXTS = []
 
10
  with open("examples.json", "r") as f:
11
  example_json = json.load(f)
12
- EXAMPLE_TEXTS = [x["text"] for x in example_json]
13
-
14
 
15
  pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
16
 
17
 
 
 
 
 
 
 
 
 
18
  def ner(text):
19
  raw = pipe(text)
20
- result = {
21
  "text": text,
22
  "entities": [
23
  {
@@ -30,14 +40,22 @@ def ner(text):
30
  for x in raw
31
  ],
32
  }
33
- return result, {}
 
 
 
34
 
35
 
36
  interface = gr.Interface(
37
  ner,
38
- inputs=gr.Textbox(label="Input", value=""),
39
- outputs=[gr.HighlightedText(combine_adjacent=True), "json"],
40
- examples=EXAMPLE_TEXTS,
 
 
 
 
 
41
  )
42
 
43
  interface.launch()
 
1
  import json
2
+ from collections import defaultdict
3
 
4
  import gradio as gr
5
+ import pandas as pd
6
  from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
9
  model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
10
 
11
+
12
+ EXAMPLE_MAP = {}
13
  with open("examples.json", "r") as f:
14
  example_json = json.load(f)
15
+ EXAMPLE_MAP = {x["text"]: x["label"] for x in example_json}
 
16
 
17
  pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
18
 
19
 
20
+ def group_by_entity(raw):
21
+ out = defaultdict(int)
22
+ for ent in raw:
23
+ out[ent["entity_group"]] += 1
24
+ out["total"] = sum(out.values())
25
+ return out
26
+
27
+
28
  def ner(text):
29
  raw = pipe(text)
30
+ ner_content = {
31
  "text": text,
32
  "entities": [
33
  {
 
40
  for x in raw
41
  ],
42
  }
43
+ grouped = group_by_entity(raw)
44
+ df = pd.DataFrame({"Entity": grouped.keys(), "Count": grouped.values()})
45
+ label = EXAMPLE_MAP.get(text, None)
46
+ return (ner_content, grouped, label, df.hist())
47
 
48
 
49
  interface = gr.Interface(
50
  ner,
51
+ inputs=gr.Textbox(label="Note text", value=""),
52
+ outputs=[
53
+ gr.HighlightedText(label="NER", combine_adjacent=True),
54
+ gr.JSON(label="Entity Counts"),
55
+ gr.Label(label="Rating"),
56
+ "plot",
57
+ ],
58
+ examples=list(EXAMPLE_MAP.keys()),
59
  )
60
 
61
  interface.launch()
requirements.txt CHANGED
@@ -60,6 +60,7 @@ sniffio==1.3.0
60
  starlette==0.20.4
61
  tokenizers==0.12.1
62
  tomli==2.0.1
 
63
  tqdm==4.64.1
64
  transformers==4.22.2
65
  typing_extensions==4.4.0
 
60
  starlette==0.20.4
61
  tokenizers==0.12.1
62
  tomli==2.0.1
63
+ torch==1.12.1
64
  tqdm==4.64.1
65
  transformers==4.22.2
66
  typing_extensions==4.4.0