Spaces:

asoria
/

auto-dataset-analyst-creator

Sleeping

App Files Files Community

asoria HF staff commited on Aug 2

Commit

ca1279e

•

1 Parent(s): 7af3e0d

add dataset viewer compatible libraries code

Browse files

Files changed (2) hide show

app.py +31 -8
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,28 +2,40 @@ import gradio as gr
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 import nbformat as nbf
 from huggingface_hub import HfApi
 """
 TODOs:
 - Handle erros
 - Add more commands to the notebook
 - Parametrize the commands (Move to another file)
-- How to handle configs and splits?
 - Let user choose the framework
 - Improve logs
 """
-def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"):
     nb = nbf.v4.new_notebook()
     nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
     with open(notebook_name, "w") as f:
         nbf.write(nb, f)
-    print(f"Notebook '{notebook_name}' created successfully.")
 def push_notebook(file_path, dataset_id, token):
     notebook_name = "dataset_analysis.ipynb"
@@ -34,7 +46,6 @@ def push_notebook(file_path, dataset_id, token):
         repo_id=dataset_id,
         repo_type="dataset",
     )
-    print("Notebook uploaded to Huggingface Hub.")
     link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
     html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
@@ -42,10 +53,22 @@ def push_notebook(file_path, dataset_id, token):
 def generate_notebook(dataset_id):
     commands = [
         "!pip install pandas",
-        "import pandas as pd",
-        f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
         "df.head()",
     ]
     notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"

 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 import nbformat as nbf
 from huggingface_hub import HfApi
+from httpx import Client
 """
 TODOs:
 - Handle erros
 - Add more commands to the notebook
 - Parametrize the commands (Move to another file)
+- How to handle configs and splits? -> Got from /compatible-libraries
 - Let user choose the framework
+- Use an LLM to suggest commands
+- Add commands for auto training
 - Improve logs
+- Enable 'generate notebook' button only if dataset is available and supports library
 """
+BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
+headers = {"Accept": "application/json", "Content-Type": "application/json"}
+client = Client(headers=headers)
+def get_compatible_libraries(dataset: str):
+    resp = client.get(
+        f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
+    )
+    return resp.json()
+def create_notebook_file(cell_commands, notebook_name):
     nb = nbf.v4.new_notebook()
     nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
     with open(notebook_name, "w") as f:
         nbf.write(nb, f)
 def push_notebook(file_path, dataset_id, token):
     notebook_name = "dataset_analysis.ipynb"
         repo_id=dataset_id,
         repo_type="dataset",
     )
     link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
     html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
 def generate_notebook(dataset_id):
+    first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
+    try:
+        libraries = get_compatible_libraries(dataset_id)["libraries"]
+    except Exception as err:
+        print(f"Error: {err}")
+        return gr.File(visible=False), gr.Row.update(visible=False)
+    if pandas_library := next(
+        (element for element in libraries if element["library"] == "pandas"), None
+    ):
+        first_code = pandas_library["loading_codes"][0]["code"]
+    else:
+        return gr.File(visible=False), gr.Row.update(visible=False)
     commands = [
         "!pip install pandas",
+        first_code,
         "df.head()",
     ]
     notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio_huggingfacehub_search==0.0.7
 huggingface_hub
-nbformat

 gradio_huggingfacehub_search==0.0.7
 huggingface_hub
+nbformat
+httpx