aemin commited on
Commit
ddbbc0a
1 Parent(s): 437699e

Upload streamlit_ner_output.py

Browse files
Files changed (1) hide show
  1. streamlit_ner_output.py +237 -0
streamlit_ner_output.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ import base64
4
+ import pandas as pd
5
+ import numpy as np
6
+ import streamlit_apps_config as config
7
+ # from colour import Color
8
+ current_path = config.project_path
9
+ def get_color(l):
10
+ if str(l).lower() in config.LABEL_COLORS.keys():
11
+ return config.LABEL_COLORS[l.lower()]
12
+ else:
13
+ r = lambda: random.randint(0,200)
14
+ return '#%02X%02X%02X' % (r(), r(), r())
15
+
16
+
17
+ def jsl_display_annotations_not_converted(original_text, fully_annotated_text, labels):
18
+ """Function to display NER annotation when ner_converter was not used
19
+ """
20
+ label_color = {}
21
+ for l in labels:
22
+ label_color[l] = get_color(l)
23
+ html_output = ""
24
+ #html_output = """<div>"""
25
+ pos = 0
26
+ for n in fully_annotated_text['ner']:
27
+ begin = n[1]
28
+ end = n[2]
29
+ entity = n[3] # When ner_converter: n[4]['entity']
30
+ word = n[4]['word'] # When ner_converter: n[3]
31
+ if pos < begin and pos < len(original_text):
32
+ white_text = original_text[pos:begin]
33
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
34
+ pos = end+1
35
+
36
+ if entity in label_color:
37
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
38
+ label_color[n[3]],
39
+ word,
40
+ entity)
41
+ else:
42
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(word)
43
+
44
+ if pos < len(original_text):
45
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])
46
+
47
+ html_output += """</div>"""
48
+ return html_output
49
+
50
+
51
+ def jsl_display_annotations(original_text, fully_annotated_text, labels):
52
+ label_color = {}
53
+ for l in labels:
54
+ label_color[l] = get_color(l)
55
+ html_output = ""
56
+ #html_output = """<div>"""
57
+ pos = 0
58
+ for n in fully_annotated_text['ner_chunk']:
59
+ #print (n)
60
+ begin = n[1]
61
+ end = n[2]
62
+ if pos < begin and pos < len(original_text):
63
+ white_text = original_text[pos:begin]
64
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
65
+ pos = end+1
66
+
67
+ if n[4]['entity'] in label_color:
68
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
69
+ label_color[n[4]['entity']],
70
+ n[3],
71
+ n[4]['entity'])
72
+ else:
73
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3])
74
+
75
+ if pos < len(original_text):
76
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])
77
+
78
+ html_output += """</div>"""
79
+ return html_output
80
+
81
+
82
+ def show_html2(original_text, fully_annotated_text, label_set, title_message="Text annotated with identified Named Entities", show_tag=True, converted=True):
83
+ """Show annotation as HTML objects
84
+
85
+ David Cecchini: Added the parameter `converted` to control if the annotated text is output of ner_converter or not (use nerTagger output)
86
+ """
87
+
88
+ if show_tag is False:
89
+ st.subheader("Text annotated with matched Entities".format(''))
90
+ html_content = jsl_display_annotations_without_tag(original_text, fully_annotated_text, label_set)
91
+ html_content = html_content.replace("\n", "<br>")
92
+ st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True)
93
+ else:
94
+ #st.subheader("Text annotated with identified Named Entities".format(''))
95
+ st.subheader(title_message.format(''))
96
+ if converted:
97
+ html_content = jsl_display_annotations(original_text, fully_annotated_text, label_set)
98
+ else:
99
+ html_content = jsl_display_annotations_not_converted(original_text, fully_annotated_text, label_set)
100
+ html_content = html_content.replace("\n", "<br>")
101
+ st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True)
102
+
103
+ st.write('')
104
+
105
+ def jsl_display_annotations_without_tag(original_text, fully_annotated_text, labels):
106
+ label_color = {}
107
+ for l in labels:
108
+ label_color[l] = get_color(l)
109
+ html_output = ""
110
+ #html_output = """<div>"""
111
+ pos = 0
112
+ for n in fully_annotated_text['matched_text']:
113
+ #print (n)
114
+ begin = n[1]
115
+ end = n[2]
116
+ if pos < begin and pos < len(original_text):
117
+ white_text = original_text[pos:begin]
118
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
119
+ pos = end+1
120
+
121
+ if n[3] in label_color:
122
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span></span>'.format(
123
+ label_color[n[3]],
124
+ n[3])
125
+ else:
126
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3])
127
+
128
+ if pos < len(original_text):
129
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])
130
+
131
+ html_output += """</div>"""
132
+ return html_output
133
+
134
+ def jsl_display_spell_correction(original_tokens, corrected_tokens):
135
+
136
+ color = get_color('rand')
137
+
138
+ st.subheader("Text annotated with corrected words".format(''))
139
+
140
+ html_output = ''
141
+ for original_token, corrected_token in zip(original_tokens, corrected_tokens):
142
+ original = original_token[3]
143
+ corrected = corrected_token[3]
144
+ if original != corrected:
145
+ html_output += ' <span class="entity-wrapper" style="background-color: {}"><span class="entity-name"><del> {} </del> {} </span></span>'.format(color, original, corrected)
146
+
147
+
148
+ else:
149
+ original = original if original in set([",", "."]) else ' ' + original #quick and dirty handle formatting
150
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original)
151
+
152
+
153
+ html_output = html_output.replace("\n", "<br>")
154
+ st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
155
+
156
+
157
+ def jsl_display_entity_resolution(original_text, fully_annotated_text, labels):
158
+ label_color = {}
159
+ for l in labels:
160
+ label_color[l] = get_color(l)
161
+ html_output = ""
162
+ #html_output = """<div>"""
163
+ pos = 0
164
+ for i, n in fully_annotated_text.iterrows():
165
+ begin = n[1]
166
+ end = n[2]
167
+ if pos < begin and pos < len(original_text):
168
+ white_text = original_text[pos:begin]
169
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
170
+ pos = end+1
171
+
172
+ resolution_chunk = n[4]
173
+ resolution_exp = n[5]
174
+ if n[3] in label_color:
175
+ second_color = get_color(resolution_chunk)
176
+ if resolution_exp.lower() != 'na':
177
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span><span class="entity-type" style="background-color: {}">{}</span></span>'.format(
178
+ label_color[n[3]] + 'B3', #color
179
+ n[0], #entity - chunk
180
+ n[3], #entity - label
181
+ label_color[n[3]] + 'FF', #color '#D2C8C6'
182
+ resolution_chunk, # res_code
183
+ label_color[n[3]] + 'CC', # res_color '#DDD2D0'
184
+ resolution_exp) # res_text
185
+
186
+ else:
187
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
188
+ label_color[n[3]],
189
+ n[0],
190
+ n[3])
191
+
192
+ if pos < len(original_text):
193
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])
194
+
195
+ html_output += """</div>"""
196
+ html_output = html_output.replace("\n", "<br>")
197
+ st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
198
+
199
+ def jsl_display_assertion(original_text, fully_annotated_text, labels):
200
+ label_color = {}
201
+ for l in labels:
202
+ label_color[l] = get_color(l)
203
+ html_output = ""
204
+ #html_output = """<div>"""
205
+ pos = 0
206
+ for i, n in fully_annotated_text.iterrows():
207
+ begin = n[1]
208
+ end = n[2]
209
+ if pos < begin and pos < len(original_text):
210
+ white_text = original_text[pos:begin]
211
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
212
+ pos = end+1
213
+
214
+ resolution_chunk = n[4]
215
+ if n[3] in label_color:
216
+ if resolution_chunk.lower() != 'na':
217
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span></span>'.format(
218
+ label_color[n[3]] + 'B3', #color
219
+ n[0], #entity - chunk
220
+ n[3], #entity - label
221
+ label_color[n[3]] + 'FF', #color '#D2C8C6'
222
+ resolution_chunk)
223
+ else:
224
+ html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
225
+ label_color[n[3]],
226
+ n[0],
227
+ n[3])
228
+
229
+ if pos < len(original_text):
230
+ html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])
231
+
232
+ html_output += """</div>"""
233
+ html_output = html_output.replace("\n", "<br>")
234
+ st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
235
+
236
+ def display_example_text(text):
237
+ return """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap; min-height: 200px; max-height: 500px; line-height: 2.0">{}</div>""".format(text)