augray
commited on
Commit
β’
53b44b3
1
Parent(s):
57f2e80
visual tweaks
Browse files
README.md
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
---
|
2 |
title: Text To SQL Hub Datasets
|
3 |
-
emoji: π₯
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
1 |
---
|
2 |
title: Text To SQL Hub Datasets
|
3 |
+
emoji: π₯ π π€
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
app_file: app.py
|
9 |
+
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
A simple space to use an LLM to iteratively refine SQL queries for HuggingFace datasets.
|
14 |
+
A fork of
|
15 |
+
[davidberenstein1957/text-to-sql-hub-datasets](https://huggingface.co/spaces/davidberenstein1957/text-to-sql-hub-datasets)
|
app.py
CHANGED
@@ -7,12 +7,26 @@ from typing import Any
|
|
7 |
import gradio as gr
|
8 |
import requests
|
9 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
10 |
-
from huggingface_hub.repocard import CardData, RepoCard
|
11 |
|
12 |
|
13 |
logger = logging.getLogger(__name__)
|
14 |
example = HuggingfaceHubSearch().example_value()
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
SYSTEM_PROMPT_TEMPLATE = (
|
18 |
"You are a SQL query expert assistant that returns a DuckDB SQL queries "
|
@@ -119,7 +133,11 @@ def get_split_choices(card_data: dict[str, Any]) -> list[str]:
|
|
119 |
|
120 |
def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
121 |
if card_data is None or len(card_data) == 0:
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
card_data = json.loads(card_data)
|
124 |
system_prompt = get_system_prompt(card_data, config, split)
|
125 |
messages = [{"role": "system", "content": system_prompt}]
|
@@ -166,7 +184,7 @@ def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
|
166 |
duck_query = response_dict["choices"][0]["message"]["content"]
|
167 |
duck_query = _sanitize_duck_query(duck_query)
|
168 |
history.append((query, duck_query))
|
169 |
-
return duck_query, get_iframe(hub_repo_id, duck_query), history
|
170 |
|
171 |
|
172 |
def _sanitize_duck_query(duck_query: str) -> str:
|
@@ -186,12 +204,9 @@ def _sanitize_duck_query(duck_query: str) -> str:
|
|
186 |
|
187 |
|
188 |
with gr.Blocks() as demo:
|
189 |
-
gr.Markdown(
|
190 |
-
|
191 |
-
|
192 |
-
It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
|
193 |
-
Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
|
194 |
-
""")
|
195 |
with gr.Row():
|
196 |
search_in = HuggingfaceHubSearch(
|
197 |
label="Search Huggingface Hub",
|
@@ -269,7 +284,7 @@ with gr.Blocks() as demo:
|
|
269 |
split_selection,
|
270 |
chatbot,
|
271 |
],
|
272 |
-
outputs=[sql_out, search_out, chatbot],
|
273 |
)
|
274 |
gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
|
275 |
|
|
|
7 |
import gradio as gr
|
8 |
import requests
|
9 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
|
10 |
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
example = HuggingfaceHubSearch().example_value()
|
14 |
|
15 |
+
HEADER_CONTENT = "# π€ Dataset DuckDB Query Chatbot"
|
16 |
+
ABOUT_CONTENT = """
|
17 |
+
This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
|
18 |
+
It uses [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
|
19 |
+
via [together.ai](https://together.ai)
|
20 |
+
Also, it uses the
|
21 |
+
[dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
|
22 |
+
|
23 |
+
Query history is saved and given to the chat model so you can chat to refine your query as you go.
|
24 |
+
|
25 |
+
When the DuckDB modal is presented, you may need to click on the name of the
|
26 |
+
config/split at the base of the modal to get the table loaded for DuckDB's use.
|
27 |
+
|
28 |
+
Search for and select a dataset to begin.
|
29 |
+
"""
|
30 |
|
31 |
SYSTEM_PROMPT_TEMPLATE = (
|
32 |
"You are a SQL query expert assistant that returns a DuckDB SQL queries "
|
|
|
133 |
|
134 |
def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
135 |
if card_data is None or len(card_data) == 0:
|
136 |
+
if hub_repo_id:
|
137 |
+
iframe = get_iframe(hub_repo_id)
|
138 |
+
else:
|
139 |
+
iframe = "<p>No dataset selected.</p>"
|
140 |
+
return "", iframe, [], ""
|
141 |
card_data = json.loads(card_data)
|
142 |
system_prompt = get_system_prompt(card_data, config, split)
|
143 |
messages = [{"role": "system", "content": system_prompt}]
|
|
|
184 |
duck_query = response_dict["choices"][0]["message"]["content"]
|
185 |
duck_query = _sanitize_duck_query(duck_query)
|
186 |
history.append((query, duck_query))
|
187 |
+
return duck_query, get_iframe(hub_repo_id, duck_query), history, ""
|
188 |
|
189 |
|
190 |
def _sanitize_duck_query(duck_query: str) -> str:
|
|
|
204 |
|
205 |
|
206 |
with gr.Blocks() as demo:
|
207 |
+
gr.Markdown(HEADER_CONTENT)
|
208 |
+
with gr.Accordion("About/Help", open=False):
|
209 |
+
gr.Markdown(ABOUT_CONTENT)
|
|
|
|
|
|
|
210 |
with gr.Row():
|
211 |
search_in = HuggingfaceHubSearch(
|
212 |
label="Search Huggingface Hub",
|
|
|
284 |
split_selection,
|
285 |
chatbot,
|
286 |
],
|
287 |
+
outputs=[sql_out, search_out, chatbot, query],
|
288 |
)
|
289 |
gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
|
290 |
|