Spaces:
Sleeping
Sleeping
add dataset viewer compatible libraries code
Browse files- app.py +31 -8
- requirements.txt +2 -1
app.py
CHANGED
@@ -2,28 +2,40 @@ import gradio as gr
|
|
2 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
3 |
import nbformat as nbf
|
4 |
from huggingface_hub import HfApi
|
5 |
-
|
6 |
|
7 |
"""
|
8 |
TODOs:
|
9 |
- Handle erros
|
10 |
- Add more commands to the notebook
|
11 |
- Parametrize the commands (Move to another file)
|
12 |
-
- How to handle configs and splits?
|
13 |
- Let user choose the framework
|
|
|
|
|
14 |
- Improve logs
|
|
|
15 |
"""
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
|
|
19 |
nb = nbf.v4.new_notebook()
|
20 |
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
|
21 |
|
22 |
with open(notebook_name, "w") as f:
|
23 |
nbf.write(nb, f)
|
24 |
|
25 |
-
print(f"Notebook '{notebook_name}' created successfully.")
|
26 |
-
|
27 |
|
28 |
def push_notebook(file_path, dataset_id, token):
|
29 |
notebook_name = "dataset_analysis.ipynb"
|
@@ -34,7 +46,6 @@ def push_notebook(file_path, dataset_id, token):
|
|
34 |
repo_id=dataset_id,
|
35 |
repo_type="dataset",
|
36 |
)
|
37 |
-
print("Notebook uploaded to Huggingface Hub.")
|
38 |
link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
|
39 |
html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
|
40 |
|
@@ -42,10 +53,22 @@ def push_notebook(file_path, dataset_id, token):
|
|
42 |
|
43 |
|
44 |
def generate_notebook(dataset_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
commands = [
|
46 |
"!pip install pandas",
|
47 |
-
|
48 |
-
f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
|
49 |
"df.head()",
|
50 |
]
|
51 |
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
|
|
|
2 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
3 |
import nbformat as nbf
|
4 |
from huggingface_hub import HfApi
|
5 |
+
from httpx import Client
|
6 |
|
7 |
"""
|
8 |
TODOs:
|
9 |
- Handle erros
|
10 |
- Add more commands to the notebook
|
11 |
- Parametrize the commands (Move to another file)
|
12 |
+
- How to handle configs and splits? -> Got from /compatible-libraries
|
13 |
- Let user choose the framework
|
14 |
+
- Use an LLM to suggest commands
|
15 |
+
- Add commands for auto training
|
16 |
- Improve logs
|
17 |
+
- Enable 'generate notebook' button only if dataset is available and supports library
|
18 |
"""
|
19 |
|
20 |
+
BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
|
21 |
+
headers = {"Accept": "application/json", "Content-Type": "application/json"}
|
22 |
+
client = Client(headers=headers)
|
23 |
+
|
24 |
+
|
25 |
+
def get_compatible_libraries(dataset: str):
|
26 |
+
resp = client.get(
|
27 |
+
f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
|
28 |
+
)
|
29 |
+
return resp.json()
|
30 |
|
31 |
+
|
32 |
+
def create_notebook_file(cell_commands, notebook_name):
|
33 |
nb = nbf.v4.new_notebook()
|
34 |
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
|
35 |
|
36 |
with open(notebook_name, "w") as f:
|
37 |
nbf.write(nb, f)
|
38 |
|
|
|
|
|
39 |
|
40 |
def push_notebook(file_path, dataset_id, token):
|
41 |
notebook_name = "dataset_analysis.ipynb"
|
|
|
46 |
repo_id=dataset_id,
|
47 |
repo_type="dataset",
|
48 |
)
|
|
|
49 |
link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
|
50 |
html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
|
51 |
|
|
|
53 |
|
54 |
|
55 |
def generate_notebook(dataset_id):
|
56 |
+
first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
|
57 |
+
try:
|
58 |
+
libraries = get_compatible_libraries(dataset_id)["libraries"]
|
59 |
+
except Exception as err:
|
60 |
+
print(f"Error: {err}")
|
61 |
+
return gr.File(visible=False), gr.Row.update(visible=False)
|
62 |
+
if pandas_library := next(
|
63 |
+
(element for element in libraries if element["library"] == "pandas"), None
|
64 |
+
):
|
65 |
+
first_code = pandas_library["loading_codes"][0]["code"]
|
66 |
+
else:
|
67 |
+
return gr.File(visible=False), gr.Row.update(visible=False)
|
68 |
+
|
69 |
commands = [
|
70 |
"!pip install pandas",
|
71 |
+
first_code,
|
|
|
72 |
"df.head()",
|
73 |
]
|
74 |
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
gradio_huggingfacehub_search==0.0.7
|
2 |
huggingface_hub
|
3 |
-
nbformat
|
|
|
|
1 |
gradio_huggingfacehub_search==0.0.7
|
2 |
huggingface_hub
|
3 |
+
nbformat
|
4 |
+
httpx
|