File size: 858 Bytes
61feb08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from datasets import load_dataset
import duckdb
# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("agents-course/unit4-students-scores")
# Convert the dataset to a Pandas DataFrame
df = ds["train"].to_pandas()
# Initialize DuckDB connection
con = duckdb.connect(":memory:")
# Register the DataFrame as a table
con.register("train", df)
# Example SQL query: Get average score by subject
query = """
WITH FilteredTrains AS (
SELECT *
FROM train
WHERE (code LIKE '%' || username || '%') or username = 'martinsu'
),
RankedTrains AS (
SELECT
code,
username,
score,
RANK() OVER (ORDER BY score DESC) AS rank
FROM
FilteredTrains
)
SELECT
rank
FROM
RankedTrains
WHERE
username = 'martinsu';
"""
result = con.execute(query).fetchdf()
print(result)
|