Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
from typing import Any | |
import datasets | |
from tqdm import tqdm | |
from huggingface_hub import login | |
import os | |
login(os.environ.get("HF_Token")) | |
test = datasets.load_dataset( | |
"minskiter/weibo", | |
split=datasets.Split.TEST, | |
streaming=True | |
) | |
int2str = test.features["labels"].feature.int2str | |
page_size = 10 | |
pages = [] | |
i = 0 | |
page = pd.DataFrame(test.take(page_size)) | |
with tqdm(desc="load dataset") as bar: | |
while len(page.index)>0: | |
pages.append(page) | |
i+=1 | |
page = pd.DataFrame(test.skip(i*page_size).take(page_size)) | |
bar.update(i) | |
cur = pages[0] | |
def show(page: float)-> pd.DataFrame: | |
global cur | |
cur = pages[int(page)] | |
return cur | |
def getobj(): | |
return { | |
"word":[], | |
"start": -1, | |
"end": -1, | |
"entity": "O" | |
} | |
def showIter(evt: gr.SelectData)->dict[str, Any]: | |
row = cur.values.tolist()[evt.index[0]] | |
text,labels = row[0],row[1] | |
labels = int2str(list(map(int,labels))) | |
entities = [] | |
obj = getobj() | |
labels.append("O") | |
for i,label in enumerate(labels): | |
if label[0]=='B' or label[0]=="S" or label[0]=="O": | |
if len(obj["word"])>0: | |
obj["word"] = "".join(obj["word"]) | |
entities.append(obj) | |
obj = getobj() | |
if label[0]=="B": | |
obj["start"] = i | |
obj["end"] = i+1 | |
obj["word"].append(text[i]) | |
obj["entity"] = label.split("-")[-1] | |
elif label[0]=="S": | |
obj["start"] = i | |
obj["end"] = i+1 | |
obj["word"] = text[i] | |
obj["entity"] = label.split("-")[-1] | |
entities.append(obj) | |
obj = getobj() | |
elif label[0]=='E' or label[0]=="I" or label[0]=="M": | |
obj["word"].append(text[i]) | |
obj["end"] = i+1 | |
return {"text": "".join(text), "entities": entities} | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
output = gr.DataFrame(value=cur) | |
page = gr.Number(minimum=0,maximum=len(pages)-1,label="page") | |
page.change(show, page, outputs=output) | |
text = gr.HighlightedText(label="preview") | |
output.select(showIter,inputs=[], outputs=[text]) | |
demo.launch() |