Spaces:
Sleeping
Sleeping
# Streamlit app to highlight NER entities | |
import random | |
import streamlit as st | |
from datasets import load_dataset | |
from annotated_text import annotated_text | |
# Load data | |
ds = load_dataset("hs-knowledge/hateval_enriched") | |
# Show highlighted ner entities in a tweet | |
def display_text(example): | |
# Use annotated_text to show entities | |
text = example["text"] | |
# Sort entities by start | |
entities = sorted(example["entities"], key=lambda x: x["start"]) | |
# Chunk text | |
if len(entities) == 0: | |
annotated_text(*[text]) | |
return | |
chunks = [] | |
last_index = 0 | |
for i in range(len(entities)): | |
entity = entities[i] | |
start, end = entity["start"], entity["end"] | |
if last_index < start: | |
chunk_before_entity = text[last_index : entity["start"]] | |
chunks.append((chunk_before_entity, None)) | |
chunks.append((entity["text"], entity["type"])) | |
last_index = end | |
# description = entity["kg_result"]["detailedDescription"]["articleBody"] | |
chunks = [(c, t) if t is not None else c for c, t in chunks] | |
annotated_text(*chunks) | |
# Get first 1000 examples | |
elements = random.choices(range(len(ds["train"])), k=50) | |
ds["train"] = ds["train"].select(elements) | |
for ex in ds["train"]: | |
st.write("=" * 80) | |
# display_text(ex) | |
st.write(ex["text"]) | |
for ent in ex["entities"]: | |
entity_name = ent["text"] | |
entity_type = ent["type"] | |
entity_description = ent["kg_result"]["detailedDescription"]["articleBody"] | |
annotated_text( | |
(entity_name, "entity"), (f"({entity_type})", "type"), entity_description | |
) | |