leandro commited on
Commit
291da6b
β€’
1 Parent(s): 699221d

add data/code

Browse files
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import os
4
+
5
+ dataset_names = os.listdir("./samples")
6
+ dataset_names = [name.split("_")[0] for name in dataset_names]
7
+
8
+ datasets = {}
9
+ for name in dataset_names:
10
+ print(f"./samples/{name}_sample/00000.jsonl")
11
+ datasets[name] = load_dataset("json", data_files=f"samples/{name}_sample/00000.jsonl.gz", split="train")
12
+
13
+ def get_sample(source, sample_id):
14
+ return f"``````\n{datasets[source][sample_id]['text']}\n``````"
15
+
16
+ with gr.Blocks() as demo:
17
+ gr.Markdown("# Text Data Inspection πŸ‘€")
18
+ with gr.Row():
19
+ data_source = gr.Dropdown(dataset_names, label="Data source:", value=dataset_names[0])
20
+ sample = gr.Number(0, 900, 0 , step=1, label="Sample ID:")
21
+ with gr.Row():
22
+ md = gr.Markdown(get_sample(dataset_names[0], 0))
23
+
24
+ sample.change(get_sample, inputs=[data_source, sample], outputs=[md])
25
+ data_source.change(get_sample, inputs=[data_source, sample], outputs=[md])
26
+
27
+ demo.launch()
samples/arxiv_sample/00000.jsonl.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0814f72be9ba2eb21223002d6c45f18e6bc1efbe27ab2aec75da95a6c2480fa8
3
+ size 16105196
samples/owm_sample/00000.jsonl.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8ce10619131942840adb4ca76680834e86bd84fc8441f99a031892419c3f2c
3
+ size 3005231
samples/stack_sample/00000.jsonl.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b4d26a428598a5ed19732ffb175a9babd13b5bd4485fc16a58fdce318ede02
3
+ size 7484894
samples/stackoverflow_sample/00000.jsonl.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cfeac647e3bfd0fbe3c3b6a89e2ad6f68be8a98b0902c849918acfa3b47874d
3
+ size 1320690
samples/wiki_sample/00000.jsonl.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59c7a2e36f246447630f6ac394d436758ab678518845bfc7ecdd48cfdc7c0912
3
+ size 2999516