import streamlit as st import random import base64 import pandas as pd import numpy as np import streamlit_apps_config as config # from colour import Color current_path = config.project_path def get_color(l): if str(l).lower() in config.LABEL_COLORS.keys(): return config.LABEL_COLORS[l.lower()] else: r = lambda: random.randint(0,200) return '#%02X%02X%02X' % (r(), r(), r()) def jsl_display_annotations_not_converted(original_text, fully_annotated_text, labels): """Function to display NER annotation when ner_converter was not used """ label_color = {} for l in labels: label_color[l] = get_color(l) html_output = "" #html_output = """
""" pos = 0 for n in fully_annotated_text['ner']: begin = n[1] end = n[2] entity = n[3] # When ner_converter: n[4]['entity'] word = n[4]['word'] # When ner_converter: n[3] if pos < begin and pos < len(original_text): white_text = original_text[pos:begin] html_output += '{}'.format(white_text) pos = end+1 if entity in label_color: html_output += '{} {}'.format( label_color[n[3]], word, entity) else: html_output += '{}'.format(word) if pos < len(original_text): html_output += '{}'.format(original_text[pos:]) html_output += """
""" return html_output def jsl_display_annotations(original_text, fully_annotated_text, labels): label_color = {} for l in labels: label_color[l] = get_color(l) html_output = "" #html_output = """
""" pos = 0 for n in fully_annotated_text['ner_chunk']: #print (n) begin = n[1] end = n[2] if pos < begin and pos < len(original_text): white_text = original_text[pos:begin] html_output += '{}'.format(white_text) pos = end+1 if n[4]['entity'] in label_color: html_output += '{} {}'.format( label_color[n[4]['entity']], n[3], n[4]['entity']) else: html_output += '{}'.format(n[3]) if pos < len(original_text): html_output += '{}'.format(original_text[pos:]) html_output += """
""" return html_output def show_html2(original_text, fully_annotated_text, label_set, title_message="Text annotated with identified Named Entities", show_tag=True, converted=True): """Show annotation as HTML objects David Cecchini: Added the parameter `converted` to control if the annotated text is output of ner_converter or not (use nerTagger output) """ if show_tag is False: st.subheader("Text annotated with matched Entities".format('')) html_content = jsl_display_annotations_without_tag(original_text, fully_annotated_text, label_set) html_content = html_content.replace("\n", "
") st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True) else: #st.subheader("Text annotated with identified Named Entities".format('')) st.subheader(title_message.format('')) if converted: html_content = jsl_display_annotations(original_text, fully_annotated_text, label_set) else: html_content = jsl_display_annotations_not_converted(original_text, fully_annotated_text, label_set) html_content = html_content.replace("\n", "
") st.write(config.HTML_WRAPPER.format(html_content), unsafe_allow_html=True) st.write('') def jsl_display_annotations_without_tag(original_text, fully_annotated_text, labels): label_color = {} for l in labels: label_color[l] = get_color(l) html_output = "" #html_output = """
""" pos = 0 for n in fully_annotated_text['matched_text']: #print (n) begin = n[1] end = n[2] if pos < begin and pos < len(original_text): white_text = original_text[pos:begin] html_output += '{}'.format(white_text) pos = end+1 if n[3] in label_color: html_output += '{} '.format( label_color[n[3]], n[3]) else: html_output += '{}'.format(n[3]) if pos < len(original_text): html_output += '{}'.format(original_text[pos:]) html_output += """
""" return html_output def jsl_display_spell_correction(original_tokens, corrected_tokens): color = get_color('rand') st.subheader("Text annotated with corrected words".format('')) html_output = '' for original_token, corrected_token in zip(original_tokens, corrected_tokens): original = original_token[3] corrected = corrected_token[3] if original != corrected: html_output += ' {} {} '.format(color, original, corrected) else: original = original if original in set([",", "."]) else ' ' + original #quick and dirty handle formatting html_output += '{}'.format(original) html_output = html_output.replace("\n", "
") st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) def jsl_display_entity_resolution(original_text, fully_annotated_text, labels): label_color = {} for l in labels: label_color[l] = get_color(l) html_output = "" #html_output = """
""" pos = 0 for i, n in fully_annotated_text.iterrows(): begin = n[1] end = n[2] if pos < begin and pos < len(original_text): white_text = original_text[pos:begin] html_output += '{}'.format(white_text) pos = end+1 resolution_chunk = n[4] resolution_exp = n[5] if n[3] in label_color: second_color = get_color(resolution_chunk) if resolution_exp.lower() != 'na': html_output += '{} {}{} {}'.format( label_color[n[3]] + 'B3', #color n[0], #entity - chunk n[3], #entity - label label_color[n[3]] + 'FF', #color '#D2C8C6' resolution_chunk, # res_code label_color[n[3]] + 'CC', # res_color '#DDD2D0' resolution_exp) # res_text else: html_output += '{} {}'.format( label_color[n[3]], n[0], n[3]) if pos < len(original_text): html_output += '{}'.format(original_text[pos:]) html_output += """
""" html_output = html_output.replace("\n", "
") st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) def jsl_display_assertion(original_text, fully_annotated_text, labels): label_color = {} for l in labels: label_color[l] = get_color(l) html_output = "" #html_output = """
""" pos = 0 for i, n in fully_annotated_text.iterrows(): begin = n[1] end = n[2] if pos < begin and pos < len(original_text): white_text = original_text[pos:begin] html_output += '{}'.format(white_text) pos = end+1 resolution_chunk = n[4] if n[3] in label_color: if resolution_chunk.lower() != 'na': html_output += '{} {}{} '.format( label_color[n[3]] + 'B3', #color n[0], #entity - chunk n[3], #entity - label label_color[n[3]] + 'FF', #color '#D2C8C6' resolution_chunk) else: html_output += '{} {}'.format( label_color[n[3]], n[0], n[3]) if pos < len(original_text): html_output += '{}'.format(original_text[pos:]) html_output += """
""" html_output = html_output.replace("\n", "
") st.write(config.HTML_WRAPPER.format(html_output), unsafe_allow_html=True) def display_example_text(text): return """
{}
""".format(text)