File size: 10,365 Bytes
ddbbc0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import streamlit as st
import random
import base64
import pandas as pd
import numpy as np
import streamlit_apps_config as config
# from colour import Color
current_path = config.project_path
def get_color(l):
    if str(l).lower() in config.LABEL_COLORS.keys():
        return config.LABEL_COLORS[l.lower()]
    else:
        r = lambda: random.randint(0,200)
        return '#%02X%02X%02X' % (r(), r(), r())


def jsl_display_annotations_not_converted(original_text, fully_annotated_text, labels):
    """Function to display NER annotation when ner_converter was not used
    """
    label_color = {}
    for l in labels:
        label_color[l] = get_color(l)
    html_output = ""
    #html_output = """<div>"""
    pos = 0
    for n in fully_annotated_text['ner']:
        begin = n[1]
        end = n[2]
        entity = n[3] # When ner_converter: n[4]['entity']
        word = n[4]['word'] # When ner_converter: n[3]
        if pos < begin and pos < len(original_text):
            white_text = original_text[pos:begin]
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
        pos = end+1

        if entity in label_color:
            html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
                label_color[n[3]],
                word,
                entity)
        else:
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(word)

    if pos < len(original_text):
        html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])

    html_output += """</div>"""
    return html_output


def jsl_display_annotations(original_text, fully_annotated_text, labels):
    label_color = {}
    for l in labels:
        label_color[l] = get_color(l)
    html_output = ""
    #html_output = """<div>"""
    pos = 0
    for n in fully_annotated_text['ner_chunk']:
        #print (n)
        begin = n[1]
        end = n[2]
        if pos < begin and pos < len(original_text):
            white_text = original_text[pos:begin]
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
        pos = end+1

        if n[4]['entity'] in label_color:
            html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
                label_color[n[4]['entity']],
                n[3],
                n[4]['entity'])
        else:
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3])

    if pos < len(original_text):
        html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])

    html_output += """</div>"""
    return html_output


def show_html2(original_text, fully_annotated_text, label_set, title_message="Text annotated with identified Named Entities", show_tag=True, converted=True):
    """Show annotation as HTML objects

    David Cecchini: Added the parameter `converted` to control if the annotated text is output of ner_converter or not (use nerTagger output)
    """

    if show_tag is False:
        st.subheader("Text annotated with matched Entities".format(''))
        html_content = jsl_display_annotations_without_tag(original_text, fully_annotated_text, label_set)
        html_content = html_content.replace("\n", "<br>")
        st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True)
    else:
        #st.subheader("Text annotated with identified Named Entities".format(''))
        st.subheader(title_message.format(''))
        if converted:
            html_content = jsl_display_annotations(original_text, fully_annotated_text, label_set)
        else:
            html_content = jsl_display_annotations_not_converted(original_text, fully_annotated_text, label_set)
        html_content = html_content.replace("\n", "<br>")
        st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True)

    st.write('')

def jsl_display_annotations_without_tag(original_text, fully_annotated_text, labels):
    label_color = {}
    for l in labels:
        label_color[l] = get_color(l)
    html_output = ""
    #html_output = """<div>"""
    pos = 0
    for n in fully_annotated_text['matched_text']:
        #print (n)
        begin = n[1]
        end = n[2]
        if pos < begin and pos < len(original_text):
            white_text = original_text[pos:begin]
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
        pos = end+1

        if n[3] in label_color:
            html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span></span>'.format(
                label_color[n[3]],
                n[3])
        else:
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3])

    if pos < len(original_text):
        html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])

    html_output += """</div>"""
    return html_output

def jsl_display_spell_correction(original_tokens, corrected_tokens):
  
  color = get_color('rand')

  st.subheader("Text annotated with corrected words".format(''))

  html_output = ''
  for original_token, corrected_token in zip(original_tokens, corrected_tokens):
    original = original_token[3]
    corrected = corrected_token[3]
    if original != corrected:   
      html_output += ' <span class="entity-wrapper" style="background-color: {}"><span class="entity-name"><del> {} </del> {} </span></span>'.format(color, original, corrected)
    
    
    else:
      original = original if original in set([",", "."]) else ' ' + original #quick and dirty handle formatting
      html_output += '<span class="others" style="background-color: white">{}</span>'.format(original)


  html_output = html_output.replace("\n", "<br>")
  st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
    
    
def jsl_display_entity_resolution(original_text, fully_annotated_text, labels):
    label_color = {}
    for l in labels:
        label_color[l] = get_color(l)
    html_output = ""
    #html_output = """<div>"""
    pos = 0
    for i, n in fully_annotated_text.iterrows():
        begin = n[1]
        end = n[2]
        if pos < begin and pos < len(original_text):
            white_text = original_text[pos:begin]
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
        pos = end+1
        
        resolution_chunk = n[4]
        resolution_exp = n[5]
        if n[3] in label_color:
            second_color = get_color(resolution_chunk)
            if resolution_exp.lower() != 'na':
                html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span><span class="entity-type" style="background-color: {}">{}</span></span>'.format(
                    label_color[n[3]] + 'B3', #color
                    n[0], #entity - chunk
                    n[3], #entity - label
                    label_color[n[3]] + 'FF', #color '#D2C8C6' 
                    resolution_chunk, # res_code
                    label_color[n[3]] + 'CC', # res_color '#DDD2D0'
                    resolution_exp) # res_text
                
            else:
                html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
                        label_color[n[3]],
                        n[0],
                        n[3])
        
    if pos < len(original_text):
        html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])

    html_output += """</div>"""
    html_output = html_output.replace("\n", "<br>")
    st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
    
def jsl_display_assertion(original_text, fully_annotated_text, labels):
    label_color = {}
    for l in labels:
        label_color[l] = get_color(l)
    html_output = ""
    #html_output = """<div>"""
    pos = 0
    for i, n in fully_annotated_text.iterrows():
        begin = n[1]
        end = n[2]
        if pos < begin and pos < len(original_text):
            white_text = original_text[pos:begin]
            html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text)
        pos = end+1
        
        resolution_chunk = n[4]
        if n[3] in label_color:
            if resolution_chunk.lower() != 'na':
                html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span></span>'.format(
                    label_color[n[3]] + 'B3', #color
                    n[0], #entity - chunk
                    n[3], #entity - label
                    label_color[n[3]] + 'FF', #color '#D2C8C6' 
                    resolution_chunk)
            else:
                html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format(
                        label_color[n[3]],
                        n[0],
                        n[3])
        
    if pos < len(original_text):
        html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:])

    html_output += """</div>"""
    html_output = html_output.replace("\n", "<br>")
    st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True)
    
def display_example_text(text):
    return """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap; min-height: 200px; max-height: 500px; line-height: 2.0">{}</div>""".format(text)