Spaces:
Running
Running
Render notebook as HTML instead
Browse files- app.py +23 -33
- requirements.txt +2 -1
app.py
CHANGED
@@ -13,6 +13,7 @@ from utils.notebook_utils import (
|
|
13 |
)
|
14 |
from dotenv import load_dotenv
|
15 |
import os
|
|
|
16 |
|
17 |
# TODOs:
|
18 |
# Improve UI code preview
|
@@ -64,6 +65,9 @@ def create_notebook_file(cells, notebook_name):
|
|
64 |
with open(notebook_name, "w") as f:
|
65 |
nbf.write(nb, f)
|
66 |
logging.info(f"Notebook {notebook_name} created successfully")
|
|
|
|
|
|
|
67 |
|
68 |
|
69 |
def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
|
@@ -96,15 +100,15 @@ def longest_string_column(df):
|
|
96 |
|
97 |
|
98 |
def generate_eda_cells(dataset_id):
|
99 |
-
|
100 |
|
101 |
|
102 |
def generate_rag_cells(dataset_id):
|
103 |
-
|
104 |
|
105 |
|
106 |
def generate_embedding_cells(dataset_id):
|
107 |
-
|
108 |
|
109 |
|
110 |
def _push_to_hub(
|
@@ -135,20 +139,18 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
|
|
135 |
except Exception as err:
|
136 |
gr.Error("Unable to retrieve dataset info from HF Hub.")
|
137 |
logging.error(f"Failed to fetch compatible libraries: {err}")
|
138 |
-
return
|
139 |
|
140 |
if not libraries:
|
141 |
logging.error(f"Dataset not compatible with pandas library - not libraries")
|
142 |
-
|
143 |
-
return
|
144 |
pandas_library = next(
|
145 |
(lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
|
146 |
None,
|
147 |
)
|
148 |
if not pandas_library:
|
149 |
logging.error("Dataset not compatible with pandas library - not pandas library")
|
150 |
-
|
151 |
-
return
|
152 |
first_config_loading_code = pandas_library["loading_codes"][0]
|
153 |
first_code = first_config_loading_code["code"]
|
154 |
first_config = first_config_loading_code["config_name"]
|
@@ -166,48 +168,38 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
|
|
166 |
logging.error(
|
167 |
"Dataset does not have categorical columns, which are required for RAG generation."
|
168 |
)
|
169 |
-
|
170 |
"",
|
171 |
"## β This dataset does not have categorical columns, which are required for Embeddings/RAG generation β",
|
172 |
)
|
173 |
-
return
|
174 |
if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
|
175 |
logging.error(
|
176 |
"Dataset does not have categorical or numeric columns, which are required for EDA generation."
|
177 |
)
|
178 |
-
|
179 |
"",
|
180 |
"## β This dataset does not have categorical or numeric columns, which are required for EDA generation β",
|
181 |
)
|
182 |
-
return
|
183 |
|
184 |
cells = replace_wildcards(
|
185 |
cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
|
186 |
)
|
187 |
-
|
188 |
-
# Show only the first 30 lines, would like to have a scroll in gr.Code https://github.com/gradio-app/gradio/issues/9192
|
189 |
-
for cell in cells:
|
190 |
-
if cell["cell_type"] == "markdown":
|
191 |
-
continue
|
192 |
-
generated_text += cell["source"] + "\n\n"
|
193 |
-
yield generated_text, ""
|
194 |
-
if generated_text.count("\n") > 30:
|
195 |
-
generated_text += (
|
196 |
-
f"## See more lines available in the generated notebook π€ ......"
|
197 |
-
)
|
198 |
-
yield generated_text, ""
|
199 |
-
break
|
200 |
notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
|
201 |
-
create_notebook_file(cells, notebook_name=notebook_name)
|
202 |
_push_to_hub(dataset_id, notebook_name)
|
203 |
notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
|
204 |
-
|
205 |
-
|
206 |
-
f"##
|
207 |
)
|
208 |
|
209 |
|
210 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
211 |
gr.Markdown("# π€ Dataset notebook creator π΅οΈ")
|
212 |
with gr.Row(equal_height=True):
|
213 |
with gr.Column(scale=2):
|
@@ -262,9 +254,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
262 |
)
|
263 |
|
264 |
with gr.Column(scale=2):
|
265 |
-
code_component = gr.
|
266 |
-
language="python", label="Notebook Code Preview", lines=40
|
267 |
-
)
|
268 |
go_to_notebook = gr.Markdown("", visible=True)
|
269 |
|
270 |
generate_eda_btn.click(
|
|
|
13 |
)
|
14 |
from dotenv import load_dotenv
|
15 |
import os
|
16 |
+
from nbconvert import HTMLExporter
|
17 |
|
18 |
# TODOs:
|
19 |
# Improve UI code preview
|
|
|
65 |
with open(notebook_name, "w") as f:
|
66 |
nbf.write(nb, f)
|
67 |
logging.info(f"Notebook {notebook_name} created successfully")
|
68 |
+
html_exporter = HTMLExporter()
|
69 |
+
html_data, _ = html_exporter.from_notebook_node(nb)
|
70 |
+
return html_data
|
71 |
|
72 |
|
73 |
def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
|
|
|
100 |
|
101 |
|
102 |
def generate_eda_cells(dataset_id):
|
103 |
+
return generate_cells(dataset_id, eda_cells, "eda")
|
104 |
|
105 |
|
106 |
def generate_rag_cells(dataset_id):
|
107 |
+
return generate_cells(dataset_id, rag_cells, "rag")
|
108 |
|
109 |
|
110 |
def generate_embedding_cells(dataset_id):
|
111 |
+
return generate_cells(dataset_id, embeggins_cells, "embeddings")
|
112 |
|
113 |
|
114 |
def _push_to_hub(
|
|
|
139 |
except Exception as err:
|
140 |
gr.Error("Unable to retrieve dataset info from HF Hub.")
|
141 |
logging.error(f"Failed to fetch compatible libraries: {err}")
|
142 |
+
return "", "## β This dataset is not accessible from the Hub β"
|
143 |
|
144 |
if not libraries:
|
145 |
logging.error(f"Dataset not compatible with pandas library - not libraries")
|
146 |
+
return "", "## β This dataset is not compatible with pandas library β"
|
|
|
147 |
pandas_library = next(
|
148 |
(lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
|
149 |
None,
|
150 |
)
|
151 |
if not pandas_library:
|
152 |
logging.error("Dataset not compatible with pandas library - not pandas library")
|
153 |
+
return "", "## β This dataset is not compatible with pandas library β"
|
|
|
154 |
first_config_loading_code = pandas_library["loading_codes"][0]
|
155 |
first_code = first_config_loading_code["code"]
|
156 |
first_config = first_config_loading_code["config_name"]
|
|
|
168 |
logging.error(
|
169 |
"Dataset does not have categorical columns, which are required for RAG generation."
|
170 |
)
|
171 |
+
return (
|
172 |
"",
|
173 |
"## β This dataset does not have categorical columns, which are required for Embeddings/RAG generation β",
|
174 |
)
|
|
|
175 |
if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
|
176 |
logging.error(
|
177 |
"Dataset does not have categorical or numeric columns, which are required for EDA generation."
|
178 |
)
|
179 |
+
return (
|
180 |
"",
|
181 |
"## β This dataset does not have categorical or numeric columns, which are required for EDA generation β",
|
182 |
)
|
|
|
183 |
|
184 |
cells = replace_wildcards(
|
185 |
cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
|
186 |
)
|
187 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
|
189 |
+
html_content = create_notebook_file(cells, notebook_name=notebook_name)
|
190 |
_push_to_hub(dataset_id, notebook_name)
|
191 |
notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
|
192 |
+
return (
|
193 |
+
html_content,
|
194 |
+
f"## π Ready to explore? Play and run the generated notebook π [here]({notebook_link})!",
|
195 |
)
|
196 |
|
197 |
|
198 |
+
with gr.Blocks(
|
199 |
+
fill_height=True,
|
200 |
+
fill_width=True,
|
201 |
+
css="#box { height: 650px; overflow-y: scroll !important}",
|
202 |
+
) as demo:
|
203 |
gr.Markdown("# π€ Dataset notebook creator π΅οΈ")
|
204 |
with gr.Row(equal_height=True):
|
205 |
with gr.Column(scale=2):
|
|
|
254 |
)
|
255 |
|
256 |
with gr.Column(scale=2):
|
257 |
+
code_component = gr.HTML(elem_id="box")
|
|
|
|
|
258 |
go_to_notebook = gr.Markdown("", visible=True)
|
259 |
|
260 |
generate_eda_btn.click(
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ huggingface_hub
|
|
3 |
nbformat
|
4 |
httpx
|
5 |
outlines
|
6 |
-
python-dotenv
|
|
|
|
3 |
nbformat
|
4 |
httpx
|
5 |
outlines
|
6 |
+
python-dotenv
|
7 |
+
nbconvert
|