File size: 2,514 Bytes
418a37b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess

def get_dataset_dependencies(dataset: str) -> set[str]:
    script_name = dataset.split("/")[-1] + ".py"
    input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset")
    result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True)
    return set(d for d in result.stdout.split("\n") if d)

def update(datasets: str):
    all_dependencies = set()
    for dataset in datasets.split("\n"):
        dataset = dataset.strip()
        print(dataset)
        if not dataset:
            continue
        try:
            dependencies = get_dataset_dependencies(dataset)
            print(f"Dependencies for {dataset} processed: {len(dependencies)}")
            all_dependencies.update(dependencies)
        except Exception as e:
            print(f"Error processing {dataset}: {e}")
            continue
    return "\n".join(sorted(list(all_dependencies)))

with gr.Blocks() as demo:
    gr.Markdown("""# Script-based dataset dependencies

Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts.
""")
    with gr.Row():
        inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10)
        out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True)
    btn = gr.Button("Run")
    examples = ["mnist\ncifar10", "mnist", """espnet/yodas
gaia-benchmark/GAIA
google/fleurs
mozilla-foundation/common_voice_1_0
mozilla-foundation/common_voice_10_0
mozilla-foundation/common_voice_11_0
mozilla-foundation/common_voice_12_0
mozilla-foundation/common_voice_13_0
mozilla-foundation/common_voice_14_0
mozilla-foundation/common_voice_15_0
mozilla-foundation/common_voice_16_0
mozilla-foundation/common_voice_16_1
mozilla-foundation/common_voice_2_0
mozilla-foundation/common_voice_3_0
mozilla-foundation/common_voice_4_0
mozilla-foundation/common_voice_5_0
mozilla-foundation/common_voice_5_1
mozilla-foundation/common_voice_6_0
mozilla-foundation/common_voice_6_1
mozilla-foundation/common_voice_7_0
mozilla-foundation/common_voice_8_0
mozilla-foundation/common_voice_9_0
poloclub/diffusiondb
pufanyi/MIMICIT
speechcolab/gigaspeech
togethercomputer/RedPajama-Data-1T
togethercomputer/RedPajama-Data-V2
"""            ]
    gr.Examples(examples, inp, label="Example Datasets", )
    btn.click(fn=update, inputs=inp, outputs=out)

demo.launch()