Spaces:
Running
Running
File size: 1,569 Bytes
c796372 dac1e43 c796372 dac1e43 c796372 38e9fcc c8dd83d b99ef2d c796372 9fc6ad5 c796372 de23bb0 9fc6ad5 de23bb0 dac1e43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import duckdb
import gradio as gr
con = duckdb.connect(":memory:")
def greet(SQL_Query):
if "limit" not in SQL_Query.lower():
raise gr.Error("You should use the LIMIT clause or it may take too much time to run your query. For example: ```LIMIT 10000```")
return con.sql(SQL_Query).df().apply(lambda x: x.apply(lambda y: y[:50] + "..." if isinstance(y, str) and len(y) > 50 else y))
examples = [
"SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10;",
"SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE language_score > 0.97 LIMIT 10;",
"SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE language_score < 0.67 LIMIT 10;",
"SELECT dump, min(language_score), avg(language_score), max(language_score) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
"SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE text SIMILAR TO '([A-Z ]){4,}.*' LIMIT 10;",
"SELECT dump, min(token_count), avg(token_count), max(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
]
css = "#component-4{display: block;}"
description = "Run SQL queries on the HuggingFaceFW/fineweb dataset"
demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description, css=css)
demo.launch()
|