from datasets import load_dataset | |
import duckdb | |
# Login using e.g. `huggingface-cli login` to access this dataset | |
ds = load_dataset("agents-course/unit4-students-scores") | |
# Convert the dataset to a Pandas DataFrame | |
df = ds["train"].to_pandas() | |
# Initialize DuckDB connection | |
con = duckdb.connect(":memory:") | |
# Register the DataFrame as a table | |
con.register("train", df) | |
# Example SQL query: Get average score by subject | |
query = """ | |
WITH FilteredTrains AS ( | |
SELECT * | |
FROM train | |
WHERE (code LIKE '%' || username || '%') or username = 'martinsu' | |
), | |
RankedTrains AS ( | |
SELECT | |
code, | |
username, | |
score, | |
RANK() OVER (ORDER BY score DESC) AS rank | |
FROM | |
FilteredTrains | |
) | |
SELECT | |
rank | |
FROM | |
RankedTrains | |
WHERE | |
username = 'martinsu'; | |
""" | |
result = con.execute(query).fetchdf() | |
print(result) | |