minskiter's picture
feat(project): update project
f9d052c
import gradio as gr
import pandas as pd
from typing import Any
import datasets
from tqdm import tqdm
from huggingface_hub import login
import os
login(os.environ.get("HF_Token"))
test = datasets.load_dataset(
"minskiter/weibo",
split=datasets.Split.TEST,
streaming=True
)
int2str = test.features["labels"].feature.int2str
page_size = 10
pages = []
i = 0
page = pd.DataFrame(test.take(page_size))
with tqdm(desc="load dataset") as bar:
while len(page.index)>0:
pages.append(page)
i+=1
page = pd.DataFrame(test.skip(i*page_size).take(page_size))
bar.update(i)
cur = pages[0]
def show(page: float)-> pd.DataFrame:
global cur
cur = pages[int(page)]
return cur
def getobj():
return {
"word":[],
"start": -1,
"end": -1,
"entity": "O"
}
def showIter(evt: gr.SelectData)->dict[str, Any]:
row = cur.values.tolist()[evt.index[0]]
text,labels = row[0],row[1]
labels = int2str(list(map(int,labels)))
entities = []
obj = getobj()
labels.append("O")
for i,label in enumerate(labels):
if label[0]=='B' or label[0]=="S" or label[0]=="O":
if len(obj["word"])>0:
obj["word"] = "".join(obj["word"])
entities.append(obj)
obj = getobj()
if label[0]=="B":
obj["start"] = i
obj["end"] = i+1
obj["word"].append(text[i])
obj["entity"] = label.split("-")[-1]
elif label[0]=="S":
obj["start"] = i
obj["end"] = i+1
obj["word"] = text[i]
obj["entity"] = label.split("-")[-1]
entities.append(obj)
obj = getobj()
elif label[0]=='E' or label[0]=="I" or label[0]=="M":
obj["word"].append(text[i])
obj["end"] = i+1
return {"text": "".join(text), "entities": entities}
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
output = gr.DataFrame(value=cur)
page = gr.Number(minimum=0,maximum=len(pages)-1,label="page")
page.change(show, page, outputs=output)
text = gr.HighlightedText(label="preview")
output.select(showIter,inputs=[], outputs=[text])
demo.launch()