Spaces:
Sleeping
Sleeping
File size: 1,643 Bytes
8739181 3f556fb 8739181 3f556fb 8739181 3f556fb 8739181 3f556fb 8739181 3f556fb 8739181 3f556fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# Streamlit app to highlight NER entities
import random
import streamlit as st
from datasets import load_dataset
from annotated_text import annotated_text
# Load data
ds = load_dataset("hs-knowledge/hateval_enriched")
# Show highlighted ner entities in a tweet
def display_text(example):
# Use annotated_text to show entities
text = example["text"]
# Sort entities by start
entities = sorted(example["entities"], key=lambda x: x["start"])
# Chunk text
if len(entities) == 0:
annotated_text(*[text])
return
chunks = []
last_index = 0
for i in range(len(entities)):
entity = entities[i]
start, end = entity["start"], entity["end"]
if last_index < start:
chunk_before_entity = text[last_index : entity["start"]]
chunks.append((chunk_before_entity, None))
chunks.append((entity["text"], entity["type"]))
last_index = end
# description = entity["kg_result"]["detailedDescription"]["articleBody"]
chunks = [(c, t) if t is not None else c for c, t in chunks]
annotated_text(*chunks)
# Get first 1000 examples
elements = random.choices(range(len(ds["train"])), k=50)
ds["train"] = ds["train"].select(elements)
for ex in ds["train"]:
st.write("=" * 80)
# display_text(ex)
st.write(ex["text"])
for ent in ex["entities"]:
entity_name = ent["text"]
entity_type = ent["type"]
entity_description = ent["kg_result"]["detailedDescription"]["articleBody"]
annotated_text(
(entity_name, "entity"), (f"({entity_type})", "type"), entity_description
)
|