Caleb Fahlgren commited on
Commit
a00be78
Β·
1 Parent(s): 033af05

add basic querying with duckdb

Browse files
Files changed (2) hide show
  1. app.py +47 -10
  2. requirements.txt +83 -0
app.py CHANGED
@@ -1,14 +1,51 @@
 
 
 
1
  import gradio as gr
2
- import spaces
3
- import torch
4
 
5
- zero = torch.Tensor([0]).cuda()
6
- print(zero.device) # <-- 'cpu' πŸ€”
7
 
8
- @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' πŸ€—
11
- return f"Hello {zero + n} Tensor"
12
 
13
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
2
+ from huggingface_hub import HfApi
3
+ import pandas as pd
4
  import gradio as gr
5
+ import duckdb
6
+ import requests
7
 
8
+ BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
 
9
 
10
+ hf_api = HfApi()
11
+ conn = duckdb.connect()
 
 
12
 
13
+
14
+ def query_dataset(dataset_id: str, query: str) -> pd.DataFrame:
15
+ response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}")
16
+ response.raise_for_status() # Check if the request was successful
17
+
18
+ first_parquet = response.json().get("parquet_files", [])[0]
19
+ first_parquet_url = first_parquet.get("url")
20
+
21
+ if not first_parquet_url:
22
+ raise ValueError("No valid URL found for the first parquet file.")
23
+
24
+ sql_query = f"SELECT * FROM read_parquet('{first_parquet_url}') limit 100;"
25
+
26
+ df = conn.execute(sql_query).fetchdf()
27
+ return df
28
+
29
+
30
+ with gr.Blocks() as demo:
31
+ gr.Markdown("# Query your HF Datasets with Natural Language πŸ“ˆπŸ“Š")
32
+ dataset_name = HuggingfaceHubSearch(
33
+ label="Hub Dataset ID",
34
+ placeholder="Find your favorite dataset...",
35
+ search_type="dataset",
36
+ value="jamescalam/world-cities-geo",
37
+ )
38
+ query_input = gr.Textbox("", label="Ask anything...")
39
+
40
+ btn = gr.Button("Ask πŸͺ„")
41
+ df = gr.DataFrame(datatype="markdown")
42
+
43
+ btn.click(
44
+ query_dataset,
45
+ inputs=[dataset_name, query_input],
46
+ outputs=[df],
47
+ )
48
+
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch()
requirements.txt CHANGED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
+ certifi==2024.6.2
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ contourpy==1.2.1
10
+ cycler==0.12.1
11
+ diskcache==5.6.3
12
+ dnspython==2.6.1
13
+ duckdb==1.0.0
14
+ email_validator==2.1.1
15
+ exceptiongroup==1.2.1
16
+ fastapi==0.111.0
17
+ fastapi-cli==0.0.4
18
+ ffmpy==0.3.2
19
+ filelock==3.14.0
20
+ fonttools==4.53.0
21
+ fsspec==2024.6.0
22
+ gradio==4.32.2
23
+ gradio_client==0.17.0
24
+ gradio_huggingfacehub_search==0.0.7
25
+ h11==0.14.0
26
+ httpcore==1.0.5
27
+ httptools==0.6.1
28
+ httpx==0.27.0
29
+ huggingface-hub==0.23.2
30
+ idna==3.7
31
+ importlib_resources==6.4.0
32
+ Jinja2==3.1.4
33
+ jsonschema==4.22.0
34
+ jsonschema-specifications==2023.12.1
35
+ kiwisolver==1.4.5
36
+ llama_cpp_python==0.2.77
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib==3.9.0
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ networkx==3.3
43
+ numpy==1.26.4
44
+ orjson==3.10.3
45
+ packaging==24.0
46
+ pandas==2.2.2
47
+ pillow==10.3.0
48
+ psutil==5.9.8
49
+ pydantic==2.7.3
50
+ pydantic_core==2.18.4
51
+ pydub==0.25.1
52
+ Pygments==2.18.0
53
+ pyparsing==3.1.2
54
+ python-dateutil==2.9.0.post0
55
+ python-dotenv==1.0.1
56
+ python-multipart==0.0.9
57
+ pytz==2024.1
58
+ PyYAML==6.0.1
59
+ referencing==0.35.1
60
+ requests==2.32.3
61
+ rich==13.7.1
62
+ rpds-py==0.18.1
63
+ ruff==0.4.7
64
+ semantic-version==2.10.0
65
+ shellingham==1.5.4
66
+ six==1.16.0
67
+ sniffio==1.3.1
68
+ spaces==0.28.3
69
+ starlette==0.37.2
70
+ sympy==1.12.1
71
+ tomlkit==0.12.0
72
+ toolz==0.12.1
73
+ torch==2.3.0
74
+ tqdm==4.66.4
75
+ typer==0.12.3
76
+ typing_extensions==4.12.1
77
+ tzdata==2024.1
78
+ ujson==5.10.0
79
+ urllib3==2.2.1
80
+ uvicorn==0.30.1
81
+ uvloop==0.19.0
82
+ watchfiles==0.22.0
83
+ websockets==11.0.3