Spaces:

hs-knowledge
/

ner_app

Sleeping

File size: 1,643 Bytes

# Streamlit app to highlight NER entities
import random
import streamlit as st
from datasets import load_dataset
from annotated_text import annotated_text

# Load data
ds = load_dataset("hs-knowledge/hateval_enriched")

# Show highlighted ner entities in a tweet


def display_text(example):
    # Use annotated_text to show entities
    text = example["text"]

    # Sort entities by start
    entities = sorted(example["entities"], key=lambda x: x["start"])

    # Chunk text

    if len(entities) == 0:
        annotated_text(*[text])
        return

    chunks = []
    last_index = 0
    for i in range(len(entities)):
        entity = entities[i]
        start, end = entity["start"], entity["end"]

        if last_index < start:
            chunk_before_entity = text[last_index : entity["start"]]
            chunks.append((chunk_before_entity, None))
        chunks.append((entity["text"], entity["type"]))

        last_index = end

    # description = entity["kg_result"]["detailedDescription"]["articleBody"]
    chunks = [(c, t) if t is not None else c for c, t in chunks]
    annotated_text(*chunks)


# Get first 1000 examples

elements = random.choices(range(len(ds["train"])), k=50)
ds["train"] = ds["train"].select(elements)

for ex in ds["train"]:
    st.write("=" * 80)
    # display_text(ex)
    st.write(ex["text"])

    for ent in ex["entities"]:
        entity_name = ent["text"]
        entity_type = ent["type"]
        entity_description = ent["kg_result"]["detailedDescription"]["articleBody"]
        annotated_text(
            (entity_name, "entity"), (f"({entity_type})", "type"), entity_description
        )