File size: 973 Bytes
291da6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
from datasets import load_dataset
import os

dataset_names = os.listdir("./samples")
dataset_names = [name.split("_")[0] for name in dataset_names]

datasets = {}
for name in dataset_names:
    print(f"./samples/{name}_sample/00000.jsonl")
    datasets[name] = load_dataset("json", data_files=f"samples/{name}_sample/00000.jsonl.gz", split="train")

def get_sample(source, sample_id):
    return f"``````\n{datasets[source][sample_id]['text']}\n``````"

with gr.Blocks() as demo:
    gr.Markdown("# Text Data Inspection 👀")
    with gr.Row():
        data_source = gr.Dropdown(dataset_names, label="Data source:", value=dataset_names[0])
        sample = gr.Number(0, 900, 0 , step=1, label="Sample ID:")
    with gr.Row():
        md = gr.Markdown(get_sample(dataset_names[0], 0))

    sample.change(get_sample, inputs=[data_source, sample], outputs=[md])
    data_source.change(get_sample, inputs=[data_source, sample], outputs=[md])

demo.launch()