# Streamlit app to highlight NER entities import random import streamlit as st from datasets import load_dataset from annotated_text import annotated_text # Load data ds = load_dataset("hs-knowledge/hateval_enriched") # Show highlighted ner entities in a tweet def display_text(example): # Use annotated_text to show entities text = example["text"] # Sort entities by start entities = sorted(example["entities"], key=lambda x: x["start"]) # Chunk text if len(entities) == 0: annotated_text(*[text]) return chunks = [] last_index = 0 for i in range(len(entities)): entity = entities[i] start, end = entity["start"], entity["end"] if last_index < start: chunk_before_entity = text[last_index : entity["start"]] chunks.append((chunk_before_entity, None)) chunks.append((entity["text"], entity["type"])) last_index = end # description = entity["kg_result"]["detailedDescription"]["articleBody"] chunks = [(c, t) if t is not None else c for c, t in chunks] annotated_text(*chunks) # Get first 1000 examples elements = random.choices(range(len(ds["train"])), k=50) ds["train"] = ds["train"].select(elements) for ex in ds["train"]: st.write("=" * 80) # display_text(ex) st.write(ex["text"]) for ent in ex["entities"]: entity_name = ent["text"] entity_type = ent["type"] entity_description = ent["kg_result"]["detailedDescription"]["articleBody"] annotated_text( (entity_name, "entity"), (f"({entity_type})", "type"), entity_description )