augray commited on
Commit
53b44b3
β€’
1 Parent(s): 57f2e80

visual tweaks

Browse files
Files changed (2) hide show
  1. README.md +5 -3
  2. app.py +25 -10
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
  title: Text To SQL Hub Datasets
3
- emoji: πŸ₯ πŸ¦™ πŸ€—
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
- pinned: true
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: Text To SQL Hub Datasets
3
+ emoji: πŸ₯ πŸ” πŸ€—
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
+ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
+ A simple space to use an LLM to iteratively refine SQL queries for HuggingFace datasets.
14
+ A fork of
15
+ [davidberenstein1957/text-to-sql-hub-datasets](https://huggingface.co/spaces/davidberenstein1957/text-to-sql-hub-datasets)
app.py CHANGED
@@ -7,12 +7,26 @@ from typing import Any
7
  import gradio as gr
8
  import requests
9
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
10
- from huggingface_hub.repocard import CardData, RepoCard
11
 
12
 
13
  logger = logging.getLogger(__name__)
14
  example = HuggingfaceHubSearch().example_value()
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  SYSTEM_PROMPT_TEMPLATE = (
18
  "You are a SQL query expert assistant that returns a DuckDB SQL queries "
@@ -119,7 +133,11 @@ def get_split_choices(card_data: dict[str, Any]) -> list[str]:
119
 
120
  def query_dataset(hub_repo_id, card_data, query, config, split, history):
121
  if card_data is None or len(card_data) == 0:
122
- return "", get_iframe(hub_repo_id), []
 
 
 
 
123
  card_data = json.loads(card_data)
124
  system_prompt = get_system_prompt(card_data, config, split)
125
  messages = [{"role": "system", "content": system_prompt}]
@@ -166,7 +184,7 @@ def query_dataset(hub_repo_id, card_data, query, config, split, history):
166
  duck_query = response_dict["choices"][0]["message"]["content"]
167
  duck_query = _sanitize_duck_query(duck_query)
168
  history.append((query, duck_query))
169
- return duck_query, get_iframe(hub_repo_id, duck_query), history
170
 
171
 
172
  def _sanitize_duck_query(duck_query: str) -> str:
@@ -186,12 +204,9 @@ def _sanitize_duck_query(duck_query: str) -> str:
186
 
187
 
188
  with gr.Blocks() as demo:
189
- gr.Markdown("""# πŸ₯ πŸ¦™ πŸ€— Text To SQL Hub Datasets πŸ€— πŸ¦™ πŸ₯
190
-
191
- This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
192
- It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
193
- Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
194
- """)
195
  with gr.Row():
196
  search_in = HuggingfaceHubSearch(
197
  label="Search Huggingface Hub",
@@ -269,7 +284,7 @@ with gr.Blocks() as demo:
269
  split_selection,
270
  chatbot,
271
  ],
272
- outputs=[sql_out, search_out, chatbot],
273
  )
274
  gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
275
 
 
7
  import gradio as gr
8
  import requests
9
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
 
10
 
11
 
12
  logger = logging.getLogger(__name__)
13
  example = HuggingfaceHubSearch().example_value()
14
 
15
+ HEADER_CONTENT = "# πŸ€— Dataset DuckDB Query Chatbot"
16
+ ABOUT_CONTENT = """
17
+ This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
18
+ It uses [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
19
+ via [together.ai](https://together.ai)
20
+ Also, it uses the
21
+ [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
22
+
23
+ Query history is saved and given to the chat model so you can chat to refine your query as you go.
24
+
25
+ When the DuckDB modal is presented, you may need to click on the name of the
26
+ config/split at the base of the modal to get the table loaded for DuckDB's use.
27
+
28
+ Search for and select a dataset to begin.
29
+ """
30
 
31
  SYSTEM_PROMPT_TEMPLATE = (
32
  "You are a SQL query expert assistant that returns a DuckDB SQL queries "
 
133
 
134
  def query_dataset(hub_repo_id, card_data, query, config, split, history):
135
  if card_data is None or len(card_data) == 0:
136
+ if hub_repo_id:
137
+ iframe = get_iframe(hub_repo_id)
138
+ else:
139
+ iframe = "<p>No dataset selected.</p>"
140
+ return "", iframe, [], ""
141
  card_data = json.loads(card_data)
142
  system_prompt = get_system_prompt(card_data, config, split)
143
  messages = [{"role": "system", "content": system_prompt}]
 
184
  duck_query = response_dict["choices"][0]["message"]["content"]
185
  duck_query = _sanitize_duck_query(duck_query)
186
  history.append((query, duck_query))
187
+ return duck_query, get_iframe(hub_repo_id, duck_query), history, ""
188
 
189
 
190
  def _sanitize_duck_query(duck_query: str) -> str:
 
204
 
205
 
206
  with gr.Blocks() as demo:
207
+ gr.Markdown(HEADER_CONTENT)
208
+ with gr.Accordion("About/Help", open=False):
209
+ gr.Markdown(ABOUT_CONTENT)
 
 
 
210
  with gr.Row():
211
  search_in = HuggingfaceHubSearch(
212
  label="Search Huggingface Hub",
 
284
  split_selection,
285
  chatbot,
286
  ],
287
+ outputs=[sql_out, search_out, chatbot, query],
288
  )
289
  gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
290