Spaces:

lint
/

gitio-query

Build error

App Files Files Community

lint commited on Jun 6, 2023

Commit

0822b7f

1 Parent(s): e8ac366

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +23 -0
gsql_app.py +124 -0
query.py +3 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+FROM python:3.10
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+RUN python gsql_app.py

gsql_app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import duckdb
+import pandas as pd
+import gradio as gr
+from datasets import load_dataset
+import tempfile
+import re
+from query import sql_query
+max_rows = 20
+df_display_kwargs = dict(
+    wrap = True,
+    max_rows = max_rows,
+    type = "pandas",
+    row_count = 3,
+    col_count = 4,
+)
+dataset_choices = [
+    "rotten_tomatoes",
+    "sciq",
+]
+def apply_sql(input_table, sql_query):
+    # Use regex to extract the table name from the SQL query
+    match = re.search(r"\bFROM\s+(\w+)", sql_query, re.IGNORECASE)
+    if match:
+        table_name = match.group(1)
+    sql_query = sql_query.replace(table_name, "input_table")
+    output_df = duckdb.query(sql_query).to_df()
+    return output_df
+def display_dataset(dataset_id):
+    dataset = load_dataset(dataset_id, split="train")
+    df = dataset.to_pandas()
+    return df, df
+def upload_dataset(dataset_file):
+    if dataset_file is None:
+        return None, None
+    print(dataset_file.name)
+    df = pd.read_csv(dataset_file.name)
+    return df, df
+def process_dataset(full_dataset, sql_query):
+    input_table = full_dataset
+    output_df = duckdb.query(sql_query).to_df()
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+        file_path = temp_file.name
+        output_df.to_csv(file_path)
+    return output_df, file_path
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    neutral_hue="slate",
+)
+with gr.Blocks(analytics_enabled=False, theme=theme) as demo:
+    full_dataset = gr.State()
+    with gr.Column():
+        with gr.Row().style(equal_height=True):
+            with gr.Column(variant="panel"):
+                with gr.Row():
+                    dark_mode_btn = gr.Button("Dark Mode", variant="primary")
+                    load_dataset_button = gr.Button("Load HF Dataset", variant="secondary")
+                dataset_selector = gr.Dropdown(label="HF Dataset", choices=dataset_choices, value=dataset_choices[0])
+            with gr.Column(variant="compact"):
+                with gr.Row():
+                    sql_query_btn = gr.Button("Apply SQL Query", variant="secondary")
+                    download_dataset_btn = gr.Button("Download Queried Dataset", variant="primary")
+                sql_query_comp = gr.Code(language=None, label="SQL Query", lines=3, value=sql_query)
+        with gr.Row().style(equal_height=True):
+            upload_dataset_comp = gr.File(label="Upload Dataset")
+            download_dataset_comp = gr.File(label="Download Dataset")
+        with gr.Column(variant="panel"):
+            input_df_display = gr.Dataframe(**df_display_kwargs, label=f"Input Dataframe (Truncated to first {max_rows} Rows)")
+            output_df_display = gr.Dataframe(**df_display_kwargs, label=f"Output Dataframe (Truncated to first {max_rows} Rows)")
+    load_dataset_button.click(fn=display_dataset, inputs=[dataset_selector], outputs=[input_df_display, full_dataset])
+    upload_dataset_comp.change(fn=upload_dataset, inputs=[upload_dataset_comp], outputs=[input_df_display, full_dataset])
+    sql_query_btn.click(fn=apply_sql, inputs=[input_df_display, sql_query_comp], outputs=[output_df_display])
+    download_dataset_btn.click(fn=process_dataset, inputs=[full_dataset, sql_query_comp], outputs=[output_df_display, download_dataset_comp])
+    toggle_dark_mode_args = dict(
+        fn=None,
+        inputs=None,
+        outputs=None,
+        _js="""() => {
+        if (document.querySelectorAll('.dark').length) {
+                document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark'));
+            } else {
+                document.querySelector('body').classList.add('dark');
+            }
+        }""",
+    )
+    demo.load(**toggle_dark_mode_args)
+    dark_mode_btn.click(**toggle_dark_mode_args)

query.py ADDED Viewed

	@@ -0,0 +1,3 @@

+sql_query='''
+SELECT * FROM input_table WHERE text LIKE '%the rock%'
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==3.33.1
+pandas>=2.0
+duckdb>=0.8.0
+datasets