import gradio as gr from huggingface_hub import hf_hub_download import subprocess def get_dataset_dependencies(dataset: str) -> set[str]: script_name = dataset.split("/")[-1] + ".py" input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset") result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True) return set(d for d in result.stdout.split("\n") if d) def update(datasets: str): all_dependencies = set() for dataset in datasets.split("\n"): dataset = dataset.strip() print(dataset) if not dataset: continue try: dependencies = get_dataset_dependencies(dataset) print(f"Dependencies for {dataset} processed: {len(dependencies)}") all_dependencies.update(dependencies) except Exception as e: print(f"Error processing {dataset}: {e}") continue return "\n".join(sorted(list(all_dependencies))) with gr.Blocks() as demo: gr.Markdown("""# Script-based dataset dependencies Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts. """) with gr.Row(): inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10) out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True) btn = gr.Button("Run") examples = ["mnist\ncifar10", "mnist", """espnet/yodas gaia-benchmark/GAIA google/fleurs mozilla-foundation/common_voice_1_0 mozilla-foundation/common_voice_10_0 mozilla-foundation/common_voice_11_0 mozilla-foundation/common_voice_12_0 mozilla-foundation/common_voice_13_0 mozilla-foundation/common_voice_14_0 mozilla-foundation/common_voice_15_0 mozilla-foundation/common_voice_16_0 mozilla-foundation/common_voice_16_1 mozilla-foundation/common_voice_2_0 mozilla-foundation/common_voice_3_0 mozilla-foundation/common_voice_4_0 mozilla-foundation/common_voice_5_0 mozilla-foundation/common_voice_5_1 mozilla-foundation/common_voice_6_0 mozilla-foundation/common_voice_6_1 mozilla-foundation/common_voice_7_0 mozilla-foundation/common_voice_8_0 mozilla-foundation/common_voice_9_0 poloclub/diffusiondb pufanyi/MIMICIT speechcolab/gigaspeech togethercomputer/RedPajama-Data-1T togethercomputer/RedPajama-Data-V2 """ ] gr.Examples(examples, inp, label="Example Datasets", ) btn.click(fn=update, inputs=inp, outputs=out) demo.launch()