Dagobert42 commited on
Commit
00a6def
1 Parent(s): 125cf0c

add color annotated results, use new models

Browse files
Files changed (3) hide show
  1. app.py +12 -22
  2. helpers.py +31 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,41 +1,31 @@
1
  import torch
2
  import streamlit as st
3
- from transformers import pipeline
4
  from random import choice
 
 
5
 
6
  with open("sentences.pt", 'rb') as f:
7
  sentences = torch.load(f)
8
  sentence = choice(sentences)
9
 
10
- baseline_classifier = pipeline(
11
- model="Dagobert42/mobilebert-uncased-biored-finetuned-ner",
12
- task="ner",
13
- aggregation_strategy="simple"
14
- )
15
- augmented_classifier = pipeline(
16
- model="Dagobert42/mobilebert-uncased-biored-augmented-ner",
17
- task="ner",
18
- aggregation_strategy="simple"
19
- )
20
-
21
  st.title("Semantic Frame Augmentation")
22
- st.caption("Analysing difficult low-resource domains with only a handful of examples")
23
 
24
- st.write("This space uses a googel/mobilebert-uncased model for NER")
25
  augment = st.toggle('Use augmented model for NER', value=False)
26
 
27
- if augment:
28
- st.write("with augmentation:")
29
- tokens = augmented_classifier(sentence)
30
- else:
31
- st.write("without augmentation:")
32
- tokens = baseline_classifier(sentence)
33
-
34
  txt = st.text_area(
35
  "Text to analyze",
36
  sentence,
37
  max_chars=500
38
  )
39
 
 
 
 
 
 
 
 
40
  st.subheader("Entity analysis:")
41
- st.write(tokens)
 
1
  import torch
2
  import streamlit as st
 
3
  from random import choice
4
+ from annotated_text import annotated_text
5
+ from helpers import *
6
 
7
  with open("sentences.pt", 'rb') as f:
8
  sentences = torch.load(f)
9
  sentence = choice(sentences)
10
 
 
 
 
 
 
 
 
 
 
 
 
11
  st.title("Semantic Frame Augmentation")
12
+ st.subheader("Analysing difficult low-resource domains with only a handful of examples")
13
 
14
+ st.write("This space uses a google/mobilebert-uncased model for NER")
15
  augment = st.toggle('Use augmented model for NER', value=False)
16
 
 
 
 
 
 
 
 
17
  txt = st.text_area(
18
  "Text to analyze",
19
  sentence,
20
  max_chars=500
21
  )
22
 
23
+ if augment:
24
+ st.write("with augmentation:")
25
+ tokens = augmented_classifier(txt)
26
+ else:
27
+ st.write("without augmentation:")
28
+ tokens = baseline_classifier(txt)
29
+
30
  st.subheader("Entity analysis:")
31
+ annotated_text(annotate_sentence(sentence, tokens))
helpers.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ baseline_classifier = pipeline("ner",
4
+ model="Dagobert42/biored-finetuned",
5
+ aggregation_strategy="simple"
6
+ )
7
+ augmented_classifier = pipeline("ner",
8
+ model="Dagobert42/biored-augmented",
9
+ aggregation_strategy="simple"
10
+ )
11
+
12
+ def annotate_sentence(sentence, predictions):
13
+ colors = {
14
+ 'null': '#bfbfbf', # Pastel gray
15
+ 'GeneOrGeneProduct': '#aad4aa', # Pastel green
16
+ 'DiseaseOrPhenotypicFeature': '#f8b400', # Pastel orange
17
+ 'ChemicalEntity': '#a4c2f4', # Pastel blue
18
+ 'OrganismTaxon': '#ffb6c1', # Pastel pink
19
+ 'SequenceVariant': '#e2b0ff', # Pastel purple
20
+ 'CellLine': '#ffcc99' # Pastel peach
21
+ }
22
+ output = []
23
+ i = 0
24
+ for p in predictions:
25
+ if sentence[i:p['start']] != '':
26
+ output.append(sentence[i:p['start']])
27
+ output.append((p['word'], p['entity_group'], colors[p['entity_group']]))
28
+ i = p['end']
29
+ if sentence[p['end']:]:
30
+ output.append(sentence[p['end']:])
31
+ return output
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  streamlit
 
2
  transformers
3
  torch
 
1
  streamlit
2
+ st-annotated-text
3
  transformers
4
  torch