Spaces:
Build error
Build error
import streamlit as st | |
import random | |
import base64 | |
import pandas as pd | |
import numpy as np | |
import streamlit_apps_config as config | |
# from colour import Color | |
current_path = config.project_path | |
def get_color(l): | |
if str(l).lower() in config.LABEL_COLORS.keys(): | |
return config.LABEL_COLORS[l.lower()] | |
else: | |
r = lambda: random.randint(0,200) | |
return '#%02X%02X%02X' % (r(), r(), r()) | |
def jsl_display_annotations_not_converted(original_text, fully_annotated_text, labels): | |
"""Function to display NER annotation when ner_converter was not used | |
""" | |
label_color = {} | |
for l in labels: | |
label_color[l] = get_color(l) | |
html_output = "" | |
#html_output = """<div>""" | |
pos = 0 | |
for n in fully_annotated_text['ner']: | |
begin = n[1] | |
end = n[2] | |
entity = n[3] # When ner_converter: n[4]['entity'] | |
word = n[4]['word'] # When ner_converter: n[3] | |
if pos < begin and pos < len(original_text): | |
white_text = original_text[pos:begin] | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text) | |
pos = end+1 | |
if entity in label_color: | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format( | |
label_color[n[3]], | |
word, | |
entity) | |
else: | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(word) | |
if pos < len(original_text): | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:]) | |
html_output += """</div>""" | |
return html_output | |
def jsl_display_annotations(original_text, fully_annotated_text, labels): | |
label_color = {} | |
for l in labels: | |
label_color[l] = get_color(l) | |
html_output = "" | |
#html_output = """<div>""" | |
pos = 0 | |
for n in fully_annotated_text['ner_chunk']: | |
#print (n) | |
begin = n[1] | |
end = n[2] | |
if pos < begin and pos < len(original_text): | |
white_text = original_text[pos:begin] | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text) | |
pos = end+1 | |
if n[4]['entity'] in label_color: | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format( | |
label_color[n[4]['entity']], | |
n[3], | |
n[4]['entity']) | |
else: | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3]) | |
if pos < len(original_text): | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:]) | |
html_output += """</div>""" | |
return html_output | |
def show_html2(original_text, fully_annotated_text, label_set, title_message="Text annotated with identified Named Entities", show_tag=True, converted=True): | |
"""Show annotation as HTML objects | |
David Cecchini: Added the parameter `converted` to control if the annotated text is output of ner_converter or not (use nerTagger output) | |
""" | |
if show_tag is False: | |
st.subheader("Text annotated with matched Entities".format('')) | |
html_content = jsl_display_annotations_without_tag(original_text, fully_annotated_text, label_set) | |
html_content = html_content.replace("\n", "<br>") | |
st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True) | |
else: | |
#st.subheader("Text annotated with identified Named Entities".format('')) | |
st.subheader(title_message.format('')) | |
if converted: | |
html_content = jsl_display_annotations(original_text, fully_annotated_text, label_set) | |
else: | |
html_content = jsl_display_annotations_not_converted(original_text, fully_annotated_text, label_set) | |
html_content = html_content.replace("\n", "<br>") | |
st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True) | |
st.write('') | |
def jsl_display_annotations_without_tag(original_text, fully_annotated_text, labels): | |
label_color = {} | |
for l in labels: | |
label_color[l] = get_color(l) | |
html_output = "" | |
#html_output = """<div>""" | |
pos = 0 | |
for n in fully_annotated_text['matched_text']: | |
#print (n) | |
begin = n[1] | |
end = n[2] | |
if pos < begin and pos < len(original_text): | |
white_text = original_text[pos:begin] | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text) | |
pos = end+1 | |
if n[3] in label_color: | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span></span>'.format( | |
label_color[n[3]], | |
n[3]) | |
else: | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(n[3]) | |
if pos < len(original_text): | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:]) | |
html_output += """</div>""" | |
return html_output | |
def jsl_display_spell_correction(original_tokens, corrected_tokens): | |
color = get_color('rand') | |
st.subheader("Text annotated with corrected words".format('')) | |
html_output = '' | |
for original_token, corrected_token in zip(original_tokens, corrected_tokens): | |
original = original_token[3] | |
corrected = corrected_token[3] | |
if original != corrected: | |
html_output += ' <span class="entity-wrapper" style="background-color: {}"><span class="entity-name"><del> {} </del> {} </span></span>'.format(color, original, corrected) | |
else: | |
original = original if original in set([",", "."]) else ' ' + original #quick and dirty handle formatting | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original) | |
html_output = html_output.replace("\n", "<br>") | |
st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) | |
def jsl_display_entity_resolution(original_text, fully_annotated_text, labels): | |
label_color = {} | |
for l in labels: | |
label_color[l] = get_color(l) | |
html_output = "" | |
#html_output = """<div>""" | |
pos = 0 | |
for i, n in fully_annotated_text.iterrows(): | |
begin = n[1] | |
end = n[2] | |
if pos < begin and pos < len(original_text): | |
white_text = original_text[pos:begin] | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text) | |
pos = end+1 | |
resolution_chunk = n[4] | |
resolution_exp = n[5] | |
if n[3] in label_color: | |
second_color = get_color(resolution_chunk) | |
if resolution_exp.lower() != 'na': | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span><span class="entity-type" style="background-color: {}">{}</span></span>'.format( | |
label_color[n[3]] + 'B3', #color | |
n[0], #entity - chunk | |
n[3], #entity - label | |
label_color[n[3]] + 'FF', #color '#D2C8C6' | |
resolution_chunk, # res_code | |
label_color[n[3]] + 'CC', # res_color '#DDD2D0' | |
resolution_exp) # res_text | |
else: | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format( | |
label_color[n[3]], | |
n[0], | |
n[3]) | |
if pos < len(original_text): | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:]) | |
html_output += """</div>""" | |
html_output = html_output.replace("\n", "<br>") | |
st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) | |
def jsl_display_assertion(original_text, fully_annotated_text, labels): | |
label_color = {} | |
for l in labels: | |
label_color[l] = get_color(l) | |
html_output = "" | |
#html_output = """<div>""" | |
pos = 0 | |
for i, n in fully_annotated_text.iterrows(): | |
begin = n[1] | |
end = n[2] | |
if pos < begin and pos < len(original_text): | |
white_text = original_text[pos:begin] | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(white_text) | |
pos = end+1 | |
resolution_chunk = n[4] | |
if n[3] in label_color: | |
if resolution_chunk.lower() != 'na': | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span><span class="entity-type" style="background-color: {}">{} </span></span>'.format( | |
label_color[n[3]] + 'B3', #color | |
n[0], #entity - chunk | |
n[3], #entity - label | |
label_color[n[3]] + 'FF', #color '#D2C8C6' | |
resolution_chunk) | |
else: | |
html_output += '<span class="entity-wrapper" style="background-color: {}"><span class="entity-name">{} </span><span class="entity-type">{}</span></span>'.format( | |
label_color[n[3]], | |
n[0], | |
n[3]) | |
if pos < len(original_text): | |
html_output += '<span class="others" style="background-color: white">{}</span>'.format(original_text[pos:]) | |
html_output += """</div>""" | |
html_output = html_output.replace("\n", "<br>") | |
st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) | |
def display_example_text(text): | |
return """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap; min-height: 200px; max-height: 500px; line-height: 2.0">{}</div>""".format(text) | |