Spaces:
Sleeping
Sleeping
| # Adaptive SQL Trainer β Domain Randomized with OpenAI (Gradio + SQLite) | |
| # - OpenAI randomizes a domain and questions (fallback dataset if unavailable). | |
| # - 3β4 related tables with seed rows installed in SQLite. | |
| # - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO. | |
| # - Validator enforces columns only when the prompt asks; otherwise focuses on rows. | |
| # - ERD shows tables only (no edges) and bolds the tables you reference while typing. | |
| # - Domain picker now round-robins across: bookstore, retail sales, wholesaler, sales tax, oil & gas wells, marketing. | |
| import os | |
| import re | |
| import json | |
| import time | |
| import random | |
| import sqlite3 | |
| import threading | |
| from datetime import datetime, timezone | |
| from typing import List, Dict, Any, Tuple, Optional, Set | |
| import gradio as gr | |
| import pandas as pd | |
| # -------------------- OpenAI (optional) -------------------- | |
| OPENAI_AVAILABLE = True | |
| DEFAULT_MODEL = os.getenv("OPENAI_MODEL") # optional override | |
| try: | |
| from openai import OpenAI | |
| _client = OpenAI() # requires OPENAI_API_KEY | |
| except Exception: | |
| OPENAI_AVAILABLE = False | |
| _client = None | |
| def _candidate_models(): | |
| base = [ | |
| (DEFAULT_MODEL or "").strip() or None, | |
| "gpt-4o-mini", | |
| "gpt-4o", | |
| "gpt-4.1-mini", | |
| ] | |
| seen = set() | |
| return [m for m in base if m and (m not in seen and not seen.add(m))] | |
| # -------------------- ERD drawing (headless) -------------------- | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| from matplotlib.patches import Rectangle | |
| from io import BytesIO | |
| from PIL import Image | |
| PLOT_FIGSIZE = (7.6, 3.8) | |
| PLOT_DPI = 120 | |
| PLOT_HEIGHT = 300 | |
| def _fig_to_pil(fig) -> Image.Image: | |
| buf = BytesIO() | |
| fig.tight_layout() | |
| fig.savefig(buf, format="png", dpi=PLOT_DPI, bbox_inches="tight") | |
| plt.close(fig) | |
| buf.seek(0) | |
| return Image.open(buf) | |
| def draw_dynamic_erd( | |
| schema: Dict[str, Any], | |
| highlight_tables: Optional[Set[str]] = None, | |
| highlight_edges: Optional[Set[Tuple[str, str]]] = None, # kept for API compatibility; ignored | |
| ) -> Image.Image: | |
| """ | |
| Draw tables (no FK edges). If highlight_tables provided, those table borders are bolded. | |
| """ | |
| highlight_tables = set(highlight_tables or []) | |
| tables = schema.get("tables", []) | |
| fig, ax = plt.subplots(figsize=PLOT_FIGSIZE); ax.axis("off") | |
| if not tables: | |
| ax.text(0.5, 0.5, "No tables to diagram.", ha="center", va="center") | |
| return _fig_to_pil(fig) | |
| # Layout tables horizontally | |
| n = len(tables) | |
| margin = 0.03 | |
| width = (1 - margin * (n + 1)) / max(n, 1) | |
| height = 0.70 | |
| y = 0.20 | |
| # Draw table boxes + columns (no edges) | |
| for i, t in enumerate(tables): | |
| tx = margin + i * (width + margin) | |
| # Border weight highlights usage | |
| lw = 2.0 if t["name"] in highlight_tables else 1.2 | |
| ax.add_patch(Rectangle((tx, y), width, height, fill=False, lw=lw)) | |
| ax.text(tx + 0.01, y + height - 0.04, t["name"], fontsize=10, ha="left", va="top", weight="bold") | |
| yy = y + height - 0.09 | |
| pkset = set(t.get("pk", []) or []) | |
| fk_map: Dict[str, List[Tuple[str, str]]] = {} | |
| for fk in t.get("fks", []) or []: | |
| ref_tbl = fk.get("ref_table", "") | |
| for c, rc in zip(fk.get("columns", []) or [], fk.get("ref_columns", []) or []): | |
| fk_map.setdefault(c, []).append((ref_tbl, rc)) | |
| for col in t.get("columns", []): | |
| nm = col.get("name", "") | |
| tag = "" | |
| if nm in pkset: | |
| tag = " (PK)" | |
| if nm in fk_map: | |
| ref = fk_map[nm][0] | |
| tag = f" (FKβ{ref[0]}.{ref[1]})" if not tag else tag.replace(")", f", FKβ{ref[0]}.{ref[1]})") | |
| ax.text(tx + 0.016, yy, f"{nm}{tag}", fontsize=9, ha="left", va="top") | |
| yy -= 0.055 | |
| ax.text(0.5, 0.06, f"Domain: {schema.get('domain','unknown')}", fontsize=9, ha="center") | |
| return _fig_to_pil(fig) | |
| # Parse JOINs from SQL to highlight tables (edges ignored now) | |
| JOIN_TBL_RE = re.compile(r"\b(?:from|join)\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?", re.IGNORECASE) | |
| EQ_ON_RE = re.compile(r"([a-z_]\w*)\.[a-z_]\w*\s*=\s*([a-z_]\w*)\.[a-z_]\w*", re.IGNORECASE) | |
| USING_RE = re.compile(r"\bjoin\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?\s+using\s*\(", re.IGNORECASE) | |
| def sql_highlights(sql: str, schema: Dict[str, Any]) -> Tuple[Set[str], Set[Tuple[str, str]]]: | |
| if not sql: | |
| return set(), set() | |
| low = " ".join(sql.strip().split()) | |
| alias_to_table: Dict[str, str] = {} | |
| join_order: List[str] = [] | |
| for m in JOIN_TBL_RE.finditer(low): | |
| table = m.group(1) | |
| alias = m.group(2) or table | |
| alias_to_table[alias] = table | |
| join_order.append(alias) | |
| # Edges are computed but not used by the renderer anymore | |
| edges: Set[Tuple[str, str]] = set() | |
| for a1, a2 in EQ_ON_RE.findall(low): | |
| t1 = alias_to_table.get(a1, a1) | |
| t2 = alias_to_table.get(a2, a2) | |
| if t1 != t2: | |
| edges.add((t1, t2)) | |
| if USING_RE.search(low) and len(join_order) >= 2: | |
| for i in range(1, len(join_order)): | |
| t_left = alias_to_table.get(join_order[i-1], join_order[i-1]) | |
| t_right = alias_to_table.get(join_order[i], join_order[i]) | |
| if t_left != t_right: | |
| edges.add((t_left, t_right)) | |
| used_tables = {alias_to_table.get(a, a) for a in join_order} | |
| schema_tables = {t["name"] for t in schema.get("tables", [])} | |
| edges = {(a, b) for (a, b) in edges if a in schema_tables and b in schema_tables} | |
| used_tables = {t for t in used_tables if t in schema_tables} | |
| return used_tables, edges | |
| # -------------------- SQL keyword highlighting (preview) -------------------- | |
| def _html_escape(s: str) -> str: | |
| return s.replace("&", "&").replace("<", "<").replace(">", ">") | |
| # Multi-word phrases first (to avoid double-highlighting) | |
| PHRASE_CLASSES = [ | |
| (r"\bgroup\s+by\b", "kw-group"), | |
| (r"\border\s+by\b", "kw-order"), | |
| (r"\bleft\s+join\b", "kw-join"), | |
| (r"\binner\s+join\b", "kw-join"), | |
| (r"\bright\s+join\b", "kw-join"), | |
| (r"\bfull\s+(?:outer\s+)?join\b", "kw-join"), | |
| (r"\bcreate\s+view\b", "kw-ddl"), | |
| (r"\bcreate\s+table\b", "kw-ddl"), | |
| (r"\binsert\s+into\b", "kw-ddl"), | |
| (r"\bselect\s+into\b", "kw-ddl"), | |
| (r"\bprimary\s+key\b", "kw-ddl"), | |
| (r"\bforeign\s+key\b", "kw-ddl"), | |
| ] | |
| TOKEN_CLASSES = [ | |
| (r"\bselect\b", "kw-core"), | |
| (r"\bfrom\b", "kw-core"), | |
| (r"\bwhere\b", "kw-core"), | |
| (r"\bjoin\b", "kw-join"), | |
| (r"\bon\b", "kw-join"), | |
| (r"\busing\b", "kw-join"), | |
| (r"\bhaving\b", "kw-group"), | |
| (r"\blimit\b", "kw-order"), | |
| (r"\boffset\b", "kw-order"), | |
| (r"\bdistinct\b", "kw-core"), | |
| (r"\bcase\b", "kw-core"), | |
| (r"\bwhen\b", "kw-core"), | |
| (r"\bthen\b", "kw-core"), | |
| (r"\belse\b", "kw-core"), | |
| (r"\bend\b", "kw-core"), | |
| (r"\band\b", "kw-core"), | |
| (r"\bor\b", "kw-core"), | |
| (r"\bnot\b", "kw-core"), | |
| (r"\bin\b", "kw-core"), | |
| (r"\bis\b", "kw-core"), | |
| (r"\bnull\b", "kw-core"), | |
| (r"\blike\b", "kw-core"), | |
| (r"\bbetween\b", "kw-core"), | |
| (r"\bunion\b", "kw-core"), | |
| (r"\ball\b", "kw-core"), | |
| (r"\bupdate\b", "kw-ddl"), | |
| (r"\bdelete\b", "kw-ddl"), | |
| (r"\bview\b", "kw-ddl"), | |
| (r"\btable\b", "kw-ddl"), | |
| (r"\binto\b", "kw-ddl"), | |
| (r"\bvalues\b", "kw-ddl"), | |
| (r"\bcreate\b", "kw-ddl"), | |
| ] | |
| def highlight_sql_html(sql: str) -> str: | |
| if not sql: | |
| return "" | |
| txt = _html_escape(sql) | |
| # Phrases first | |
| for pat, cls in PHRASE_CLASSES: | |
| txt = re.sub(pat, lambda m: f'<span class="kw {cls}">{m.group(0).upper()}</span>', txt, flags=re.IGNORECASE) | |
| # Single tokens | |
| for pat, cls in TOKEN_CLASSES: | |
| txt = re.sub(pat, lambda m: f'<span class="kw {cls}">{m.group(0).upper()}</span>', txt, flags=re.IGNORECASE) | |
| return f'<div class="sql-preview"><pre><code>{txt}</code></pre></div>' | |
| # -------------------- SQLite + locking -------------------- | |
| DB_DIR = "/data" if os.path.exists("/data") else "." | |
| DB_PATH = os.path.join(DB_DIR, "sql_trainer_dynamic.db") | |
| EXPORT_DIR = "." | |
| RANDOM_SEED = int(os.getenv("RANDOM_SEED", "7")) | |
| random.seed(RANDOM_SEED) | |
| SYS_RAND = random.SystemRandom() | |
| DB_LOCK = threading.RLock() | |
| def connect_db(): | |
| con = sqlite3.connect(DB_PATH, check_same_thread=False) | |
| con.execute("PRAGMA journal_mode=WAL;") | |
| con.execute("PRAGMA synchronous=NORMAL;") | |
| con.execute("PRAGMA foreign_keys = ON;") | |
| return con | |
| CONN = connect_db() | |
| def init_progress_tables(con: sqlite3.Connection): | |
| with DB_LOCK: | |
| cur = con.cursor() | |
| cur.execute(""" | |
| CREATE TABLE IF NOT EXISTS users ( | |
| user_id TEXT PRIMARY KEY, | |
| name TEXT, | |
| created_at TEXT | |
| ) | |
| """) | |
| cur.execute(""" | |
| CREATE TABLE IF NOT EXISTS attempts ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| user_id TEXT, | |
| question_id TEXT, | |
| category TEXT, | |
| correct INTEGER, | |
| sql_text TEXT, | |
| timestamp TEXT, | |
| time_taken REAL, | |
| difficulty INTEGER, | |
| source TEXT, | |
| notes TEXT | |
| ) | |
| """) | |
| cur.execute(""" | |
| CREATE TABLE IF NOT EXISTS session_meta ( | |
| id INTEGER PRIMARY KEY CHECK (id=1), | |
| domain TEXT, | |
| schema_json TEXT | |
| ) | |
| """) | |
| con.commit() | |
| init_progress_tables(CONN) | |
| # -------------------- Built-in fallback domain packs -------------------- | |
| # (packs identical to your last version; omitted comments for brevity) | |
| FALLBACK_PACKS: Dict[str, Dict[str, Any]] = {} | |
| # --- Bookstore (existing) --- | |
| FALLBACK_PACKS["bookstore"] = { | |
| "schema": { | |
| "domain": "bookstore", | |
| "tables": [ | |
| { | |
| "name": "authors", | |
| "pk": ["author_id"], | |
| "columns": [ | |
| {"name":"author_id","type":"INTEGER"}, | |
| {"name":"name","type":"TEXT"}, | |
| {"name":"country","type":"TEXT"}, | |
| {"name":"birth_year","type":"INTEGER"}, | |
| ], | |
| "fks": [], | |
| "rows": [ | |
| {"author_id":1,"name":"Isaac Asimov","country":"USA","birth_year":1920}, | |
| {"author_id":2,"name":"Ursula K. Le Guin","country":"USA","birth_year":1929}, | |
| {"author_id":3,"name":"Haruki Murakami","country":"Japan","birth_year":1949}, | |
| {"author_id":4,"name":"Chinua Achebe","country":"Nigeria","birth_year":1930}, | |
| {"author_id":5,"name":"Jane Austen","country":"UK","birth_year":1775}, | |
| {"author_id":6,"name":"J.K. Rowling","country":"UK","birth_year":1965}, | |
| {"author_id":7,"name":"Yuval Noah Harari","country":"Israel","birth_year":1976}, | |
| {"author_id":8,"name":"New Author","country":"Nowhere","birth_year":1990}, | |
| ], | |
| }, | |
| { | |
| "name": "bookstores", | |
| "pk": ["store_id"], | |
| "columns": [ | |
| {"name":"store_id","type":"INTEGER"}, | |
| {"name":"name","type":"TEXT"}, | |
| {"name":"city","type":"TEXT"}, | |
| {"name":"state","type":"TEXT"}, | |
| ], | |
| "fks": [], | |
| "rows": [ | |
| {"store_id":1,"name":"Downtown Books","city":"Oklahoma City","state":"OK"}, | |
| {"store_id":2,"name":"Harbor Books","city":"Seattle","state":"WA"}, | |
| {"store_id":3,"name":"Desert Pages","city":"Phoenix","state":"AZ"}, | |
| ], | |
| }, | |
| { | |
| "name": "books", | |
| "pk": ["book_id"], | |
| "columns": [ | |
| {"name":"book_id","type":"INTEGER"}, | |
| {"name":"title","type":"TEXT"}, | |
| {"name":"author_id","type":"INTEGER"}, | |
| {"name":"store_id","type":"INTEGER"}, | |
| {"name":"category","type":"TEXT"}, | |
| {"name":"price","type":"REAL"}, | |
| {"name":"published_year","type":"INTEGER"}, | |
| ], | |
| "fks": [ | |
| {"columns":["author_id"],"ref_table":"authors","ref_columns":["author_id"]}, | |
| {"columns":["store_id"],"ref_table":"bookstores","ref_columns":["store_id"]}, | |
| ], | |
| "rows": [ | |
| {"book_id":101,"title":"Foundation","author_id":1,"store_id":1,"category":"Sci-Fi","price":14.99,"published_year":1951}, | |
| {"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950}, | |
| {"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969}, | |
| {"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968}, | |
| {"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00}, | |
| {"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00}, | |
| {"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00}, | |
| {"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00}, | |
| {"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00}, | |
| {"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00}, | |
| {"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00}, | |
| {"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00}, | |
| ], | |
| }, | |
| ] | |
| }, | |
| "questions": [ | |
| {"id":"B_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"Select all rows and columns from `authors`.", | |
| "answer_sql":["SELECT * FROM authors;"]}, | |
| {"id":"B_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"Show `title` and `price` from `books`.", | |
| "answer_sql":["SELECT title, price FROM books;"]}, | |
| {"id":"B_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"List SciβFi books under $15 (show title, price).", | |
| "answer_sql":["SELECT title, price FROM books WHERE category='Sci-Fi' AND price < 15;"]}, | |
| {"id":"B_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Using aliases `b` and `a`, join `books` to `authors` and show `b.title` and `a.name` as `author_name`.", | |
| "answer_sql":["SELECT b.title, a.name AS author_name FROM books b JOIN authors a ON b.author_id=a.author_id;"], | |
| "requires_aliases":True,"required_aliases":["a","b"]}, | |
| {"id":"B_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Inner join `books` and `bookstores`. Return `title`, `name` as `store`.", | |
| "answer_sql":["SELECT b.title, s.name AS store FROM books b INNER JOIN bookstores s ON b.store_id=s.store_id;"]}, | |
| {"id":"B_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"List each author and their number of books (include authors with zero): columns `name`, `book_count`.", | |
| "answer_sql":["SELECT a.name, COUNT(b.book_id) AS book_count FROM authors a LEFT JOIN books b ON a.author_id=b.author_id GROUP BY a.name;"]}, | |
| {"id":"B_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create a view `vw_pricy` with `title`, `price` for books priced > 25.", | |
| "answer_sql":["CREATE VIEW vw_pricy AS SELECT title, price FROM books WHERE price > 25;"]}, | |
| {"id":"B_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create a table `cheap_books` containing books priced < 12. Use CTAS or SELECT INTO.", | |
| "answer_sql":[ | |
| "CREATE TABLE cheap_books AS SELECT * FROM books WHERE price < 12;", | |
| "SELECT * INTO cheap_books FROM books WHERE price < 12;" | |
| ]}, | |
| ] | |
| } | |
| # --- Retail sales --- | |
| FALLBACK_PACKS["retail sales"] = { | |
| "schema": { | |
| "domain": "retail sales", | |
| "tables": [ | |
| {"name":"customers","pk":["customer_id"], | |
| "columns":[ | |
| {"name":"customer_id","type":"INTEGER"}, | |
| {"name":"name","type":"TEXT"}, | |
| {"name":"city","type":"TEXT"}, | |
| {"name":"state","type":"TEXT"}], | |
| "fks":[], | |
| "rows":[ | |
| {"customer_id":1,"name":"Ava Reed","city":"Seattle","state":"WA"}, | |
| {"customer_id":2,"name":"Mason Ortiz","city":"Portland","state":"OR"}, | |
| {"customer_id":3,"name":"Noah Patel","city":"Phoenix","state":"AZ"}, | |
| {"customer_id":4,"name":"Emma Kim","city":"San Diego","state":"CA"}, | |
| {"customer_id":5,"name":"Olivia Park","city":"Dallas","state":"TX"}, | |
| {"customer_id":6,"name":"Liam Gray","city":"Denver","state":"CO"}, | |
| {"customer_id":7,"name":"Sophia Lee","city":"Boston","state":"MA"}, | |
| {"customer_id":8,"name":"Elijah Hall","city":"Miami","state":"FL"}]}, | |
| {"name":"products","pk":["product_id"], | |
| "columns":[ | |
| {"name":"product_id","type":"INTEGER"}, | |
| {"name":"product_name","type":"TEXT"}, | |
| {"name":"category","type":"TEXT"}, | |
| {"name":"price","type":"REAL"}], | |
| "fks":[], | |
| "rows":[ | |
| {"product_id":101,"product_name":"Coffee Maker","category":"Home","price":49.99}, | |
| {"product_id":102,"product_name":"Electric Kettle","category":"Home","price":29.99}, | |
| {"product_id":103,"product_name":"Headphones","category":"Electronics","price":79.00}, | |
| {"product_id":104,"product_name":"USB-C Cable","category":"Electronics","price":9.99}, | |
| {"product_id":105,"product_name":"Notebook","category":"Stationery","price":3.49}, | |
| {"product_id":106,"product_name":"Desk Lamp","category":"Home","price":19.99}, | |
| {"product_id":107,"product_name":"T-Shirt","category":"Clothing","price":15.00}, | |
| {"product_id":108,"product_name":"Sneakers","category":"Clothing","price":65.00}]}, | |
| {"name":"orders","pk":["order_id"], | |
| "columns":[ | |
| {"name":"order_id","type":"INTEGER"}, | |
| {"name":"customer_id","type":"INTEGER"}, | |
| {"name":"order_date","type":"TEXT"}], | |
| "fks":[{"columns":["customer_id"],"ref_table":"customers","ref_columns":["customer_id"]}], | |
| "rows":[ | |
| {"order_id":201,"customer_id":1,"order_date":"2024-01-05"}, | |
| {"order_id":202,"customer_id":2,"order_date":"2024-01-07"}, | |
| {"order_id":203,"customer_id":1,"order_date":"2024-01-12"}, | |
| {"order_id":204,"customer_id":3,"order_date":"2024-02-01"}, | |
| {"order_id":205,"customer_id":4,"order_date":"2024-02-10"}, | |
| {"order_id":206,"customer_id":5,"order_date":"2024-03-02"}, | |
| {"order_id":207,"customer_id":6,"order_date":"2024-03-03"}, | |
| {"order_id":208,"customer_id":7,"order_date":"2024-03-09"}, | |
| {"order_id":209,"customer_id":8,"order_date":"2024-03-15"}, | |
| {"order_id":210,"customer_id":3,"order_date":"2024-03-20"}]}, | |
| {"name":"order_items","pk":["order_id","product_id"], | |
| "columns":[ | |
| {"name":"order_id","type":"INTEGER"}, | |
| {"name":"product_id","type":"INTEGER"}, | |
| {"name":"qty","type":"INTEGER"}, | |
| {"name":"unit_price","type":"REAL"}], | |
| "fks":[ | |
| {"columns":["order_id"],"ref_table":"orders","ref_columns":["order_id"]}, | |
| {"columns":["product_id"],"ref_table":"products","ref_columns":["product_id"]}], | |
| "rows":[ | |
| {"order_id":201,"product_id":101,"qty":1,"unit_price":49.99}, | |
| {"order_id":201,"product_id":104,"qty":2,"unit_price":9.99}, | |
| {"order_id":202,"product_id":107,"qty":3,"unit_price":15.00}, | |
| {"order_id":203,"product_id":103,"qty":1,"unit_price":79.00}, | |
| {"order_id":203,"product_id":105,"qty":5,"unit_price":3.49}, | |
| {"order_id":204,"product_id":102,"qty":2,"unit_price":29.99}, | |
| {"order_id":205,"product_id":108,"qty":1,"unit_price":65.00}, | |
| {"order_id":206,"product_id":106,"qty":2,"unit_price":19.99}, | |
| {"order_id":207,"product_id":104,"qty":4,"unit_price":9.99}, | |
| {"order_id":208,"product_id":101,"qty":1,"unit_price":49.99}, | |
| {"order_id":209,"product_id":107,"qty":2,"unit_price":15.00}, | |
| {"order_id":210,"product_id":103,"qty":1,"unit_price":79.00}]}] }, | |
| "questions":[ | |
| {"id":"RS_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"Show everything from `customers`.", | |
| "answer_sql":["SELECT * FROM customers;"]}, | |
| {"id":"RS_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"List product name and price from `products`.", | |
| "answer_sql":["SELECT product_name, price FROM products;"]}, | |
| {"id":"RS_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"Orders placed in March 2024 (return `order_id`, `order_date`).", | |
| "answer_sql":["SELECT order_id, order_date FROM orders WHERE order_date BETWEEN '2024-03-01' AND '2024-03-31';"]}, | |
| {"id":"RS_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Join `orders` (alias `o`) with `customers` (alias `c`) and show `o.order_id`, `c.name`.", | |
| "answer_sql":["SELECT o.order_id, c.name FROM orders o JOIN customers c ON o.customer_id=c.customer_id;"], | |
| "requires_aliases":True,"required_aliases":["o","c"]}, | |
| {"id":"RS_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Inner join `order_items` with `products` to show items where qty β₯ 3. Return `product_name`, `qty`.", | |
| "answer_sql":["SELECT p.product_name, oi.qty FROM order_items oi INNER JOIN products p ON oi.product_id=p.product_id WHERE oi.qty >= 3;"]}, | |
| {"id":"RS_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"Customers and their number of orders (include zero). Columns: `name`, `order_count`.", | |
| "answer_sql":["SELECT c.name, COUNT(o.order_id) AS order_count FROM customers c LEFT JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.name;"]}, | |
| {"id":"RS_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create view `vw_top_qty` with total quantity by product: `product_id`, `total_qty`.", | |
| "answer_sql":["CREATE VIEW vw_top_qty AS SELECT product_id, SUM(qty) AS total_qty FROM order_items GROUP BY product_id;"]}, | |
| {"id":"RS_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create table `cheap_products` with products priced < 10.", | |
| "answer_sql":[ | |
| "CREATE TABLE cheap_products AS SELECT * FROM products WHERE price < 10;", | |
| "SELECT * INTO cheap_products FROM products WHERE price < 10;" | |
| ]} | |
| ] | |
| } | |
| # --- Wholesaler --- | |
| FALLBACK_PACKS["wholesaler"] = { | |
| "schema":{ | |
| "domain":"wholesaler", | |
| "tables":[ | |
| {"name":"suppliers","pk":["supplier_id"], | |
| "columns":[ | |
| {"name":"supplier_id","type":"INTEGER"}, | |
| {"name":"supplier_name","type":"TEXT"}, | |
| {"name":"country","type":"TEXT"}], | |
| "fks":[], | |
| "rows":[ | |
| {"supplier_id":1,"supplier_name":"Nordic Foods","country":"SE"}, | |
| {"supplier_id":2,"supplier_name":"Metro Trade","country":"DE"}, | |
| {"supplier_id":3,"supplier_name":"Pacific Imports","country":"US"}, | |
| {"supplier_id":4,"supplier_name":"Andes Supply","country":"CL"}, | |
| {"supplier_id":5,"supplier_name":"Sahara Wholesale","country":"MA"}]}, | |
| {"name":"items","pk":["item_id"], | |
| "columns":[ | |
| {"name":"item_id","type":"INTEGER"}, | |
| {"name":"item_name","type":"TEXT"}, | |
| {"name":"unit_cost","type":"REAL"}], | |
| "fks":[], | |
| "rows":[ | |
| {"item_id":101,"item_name":"Olive Oil 1L","unit_cost":4.20}, | |
| {"item_id":102,"item_name":"Canned Tuna","unit_cost":1.10}, | |
| {"item_id":103,"item_name":"Basmati Rice 5kg","unit_cost":6.30}, | |
| {"item_id":104,"item_name":"Black Tea 200g","unit_cost":2.70}, | |
| {"item_id":105,"item_name":"Peanut Butter","unit_cost":3.00}, | |
| {"item_id":106,"item_name":"Tomato Paste","unit_cost":0.95}, | |
| {"item_id":107,"item_name":"Chickpeas 1kg","unit_cost":1.60}, | |
| {"item_id":108,"item_name":"Soy Sauce 500ml","unit_cost":2.10}]}, | |
| {"name":"purchase_orders","pk":["po_id"], | |
| "columns":[ | |
| {"name":"po_id","type":"INTEGER"}, | |
| {"name":"supplier_id","type":"INTEGER"}, | |
| {"name":"po_date","type":"TEXT"}], | |
| "fks":[{"columns":["supplier_id"],"ref_table":"suppliers","ref_columns":["supplier_id"]}], | |
| "rows":[ | |
| {"po_id":201,"supplier_id":1,"po_date":"2024-01-10"}, | |
| {"po_id":202,"supplier_id":2,"po_date":"2024-01-18"}, | |
| {"po_id":203,"supplier_id":3,"po_date":"2024-02-05"}, | |
| {"po_id":204,"supplier_id":1,"po_date":"2024-02-22"}, | |
| {"po_id":205,"supplier_id":5,"po_date":"2024-03-01"}, | |
| {"po_id":206,"supplier_id":4,"po_date":"2024-03-07"}]}, | |
| {"name":"po_lines","pk":["po_id","item_id"], | |
| "columns":[ | |
| {"name":"po_id","type":"INTEGER"}, | |
| {"name":"item_id","type":"INTEGER"}, | |
| {"name":"qty","type":"INTEGER"}, | |
| {"name":"line_cost","type":"REAL"}], | |
| "fks":[ | |
| {"columns":["po_id"],"ref_table":"purchase_orders","ref_columns":["po_id"]}, | |
| {"columns":["item_id"],"ref_table":"items","ref_columns":["item_id"]}], | |
| "rows":[ | |
| {"po_id":201,"item_id":101,"qty":200,"line_cost":840.0}, | |
| {"po_id":201,"item_id":106,"qty":500,"line_cost":475.0}, | |
| {"po_id":202,"item_id":103,"qty":120,"line_cost":756.0}, | |
| {"po_id":203,"item_id":102,"qty":600,"line_cost":660.0}, | |
| {"po_id":203,"item_id":104,"qty":150,"line_cost":405.0}, | |
| {"po_id":204,"item_id":105,"qty":180,"line_cost":540.0}, | |
| {"po_id":205,"item_id":107,"qty":300,"line_cost":480.0}, | |
| {"po_id":206,"item_id":108,"qty":250,"line_cost":525.0}]}] | |
| }, | |
| "questions":[ | |
| {"id":"W_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"Show all suppliers.", | |
| "answer_sql":["SELECT * FROM suppliers;"]}, | |
| {"id":"W_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"Return `item_name` and `unit_cost` from `items`.", | |
| "answer_sql":["SELECT item_name, unit_cost FROM items;"]}, | |
| {"id":"W_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"Items costing more than 3.00 (show name, cost).", | |
| "answer_sql":["SELECT item_name, unit_cost FROM items WHERE unit_cost > 3.00;"]}, | |
| {"id":"W_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Using aliases `p` and `s`, show each `po_id` with `supplier_name`.", | |
| "answer_sql":["SELECT p.po_id, s.supplier_name FROM purchase_orders p JOIN suppliers s ON p.supplier_id=s.supplier_id;"], | |
| "requires_aliases":True,"required_aliases":["p","s"]}, | |
| {"id":"W_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Inner join `po_lines` and `items`; list `item_name`, total qty per item.", | |
| "answer_sql":["SELECT i.item_name, SUM(l.qty) AS total_qty FROM po_lines l INNER JOIN items i ON l.item_id=i.item_id GROUP BY i.item_name;"]}, | |
| {"id":"W_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"Suppliers and count of POs (include those with zero). Columns `supplier_name`, `po_count`.", | |
| "answer_sql":["SELECT s.supplier_name, COUNT(p.po_id) AS po_count FROM suppliers s LEFT JOIN purchase_orders p ON s.supplier_id=p.supplier_id GROUP BY s.supplier_name;"]}, | |
| {"id":"W_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create view `vw_po_value` with `po_id` and total `line_cost` per PO.", | |
| "answer_sql":["CREATE VIEW vw_po_value AS SELECT po_id, SUM(line_cost) AS po_value FROM po_lines GROUP BY po_id;"]}, | |
| {"id":"W_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create table `budget_items` where unit_cost < 2.00.", | |
| "answer_sql":[ | |
| "CREATE TABLE budget_items AS SELECT * FROM items WHERE unit_cost < 2.00;", | |
| "SELECT * INTO budget_items FROM items WHERE unit_cost < 2.00;" | |
| ]} | |
| ] | |
| } | |
| # --- Sales tax --- | |
| FALLBACK_PACKS["sales tax"] = { | |
| "schema":{ | |
| "domain":"sales tax", | |
| "tables":[ | |
| {"name":"jurisdictions","pk":["jurisdiction_id"], | |
| "columns":[ | |
| {"name":"jurisdiction_id","type":"INTEGER"}, | |
| {"name":"name","type":"TEXT"}, | |
| {"name":"state","type":"TEXT"}], | |
| "fks":[], | |
| "rows":[ | |
| {"jurisdiction_id":1,"name":"King County","state":"WA"}, | |
| {"jurisdiction_id":2,"name":"Multnomah","state":"OR"}, | |
| {"jurisdiction_id":3,"name":"Maricopa","state":"AZ"}, | |
| {"jurisdiction_id":4,"name":"Travis","state":"TX"}, | |
| {"jurisdiction_id":5,"name":"Denver","state":"CO"}, | |
| {"jurisdiction_id":6,"name":"Miami-Dade","state":"FL"}]}, | |
| {"name":"tax_rates","pk":["rate_id"], | |
| "columns":[ | |
| {"name":"rate_id","type":"INTEGER"}, | |
| {"name":"jurisdiction_id","type":"INTEGER"}, | |
| {"name":"category","type":"TEXT"}, | |
| {"name":"rate","type":"REAL"}], | |
| "fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}], | |
| "rows":[ | |
| {"rate_id":101,"jurisdiction_id":1,"category":"general","rate":0.102}, | |
| {"rate_id":102,"jurisdiction_id":2,"category":"general","rate":0.000}, | |
| {"rate_id":103,"jurisdiction_id":3,"category":"general","rate":0.056}, | |
| {"rate_id":104,"jurisdiction_id":4,"category":"general","rate":0.0825}, | |
| {"rate_id":105,"jurisdiction_id":5,"category":"general","rate":0.081}, | |
| {"rate_id":106,"jurisdiction_id":6,"category":"general","rate":0.070}]}, | |
| {"name":"transactions","pk":["txn_id"], | |
| "columns":[ | |
| {"name":"txn_id","type":"INTEGER"}, | |
| {"name":"txn_date","type":"TEXT"}, | |
| {"name":"amount","type":"REAL"}, | |
| {"name":"category","type":"TEXT"}, | |
| {"name":"jurisdiction_id","type":"INTEGER"}], | |
| "fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}], | |
| "rows":[ | |
| {"txn_id":201,"txn_date":"2024-01-03","amount":120.00,"category":"general","jurisdiction_id":1}, | |
| {"txn_id":202,"txn_date":"2024-01-05","amount":55.25,"category":"general","jurisdiction_id":2}, | |
| {"txn_id":203,"txn_date":"2024-01-10","amount":300.00,"category":"general","jurisdiction_id":3}, | |
| {"txn_id":204,"txn_date":"2024-02-01","amount":240.55,"category":"general","jurisdiction_id":4}, | |
| {"txn_id":205,"txn_date":"2024-02-14","amount":89.99,"category":"general","jurisdiction_id":5}, | |
| {"txn_id":206,"txn_date":"2024-03-02","amount":150.00,"category":"general","jurisdiction_id":6}, | |
| {"txn_id":207,"txn_date":"2024-03-09","amount":70.00,"category":"general","jurisdiction_id":1}, | |
| {"txn_id":208,"txn_date":"2024-03-15","amount":18.50,"category":"general","jurisdiction_id":2}, | |
| {"txn_id":209,"txn_date":"2024-03-20","amount":99.95,"category":"general","jurisdiction_id":3}, | |
| {"txn_id":210,"txn_date":"2024-03-25","amount":199.99,"category":"general","jurisdiction_id":4}]}] | |
| }, | |
| "questions":[ | |
| {"id":"TX_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"Show all records from `jurisdictions`.", | |
| "answer_sql":["SELECT * FROM jurisdictions;"]}, | |
| {"id":"TX_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"Show `jurisdiction_id`, `rate` from `tax_rates`.", | |
| "answer_sql":["SELECT jurisdiction_id, rate FROM tax_rates;"]}, | |
| {"id":"TX_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"Transactions over $150 (return `txn_id`, `amount`).", | |
| "answer_sql":["SELECT txn_id, amount FROM transactions WHERE amount > 150;"]}, | |
| {"id":"TX_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Join `transactions` (`t`) with `jurisdictions` (`j`), returning `t.txn_id`, `j.name`.", | |
| "answer_sql":["SELECT t.txn_id, j.name FROM transactions t JOIN jurisdictions j ON t.jurisdiction_id=j.jurisdiction_id;"], | |
| "requires_aliases":True,"required_aliases":["t","j"]}, | |
| {"id":"TX_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Compute tax for each transaction: return `txn_id`, `amount*rate` as `tax`.", | |
| "answer_sql":["SELECT t.txn_id, t.amount * r.rate AS tax FROM transactions t INNER JOIN tax_rates r ON t.jurisdiction_id=r.jurisdiction_id AND t.category=r.category;"]}, | |
| {"id":"TX_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"Jurisdictions and count of transactions (include zero). Columns `name`, `txn_count`.", | |
| "answer_sql":["SELECT j.name, COUNT(t.txn_id) AS txn_count FROM jurisdictions j LEFT JOIN transactions t ON j.jurisdiction_id=t.jurisdiction_id GROUP BY j.name;"]}, | |
| {"id":"TX_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create view `vw_high_txn` with transactions amount > 150.", | |
| "answer_sql":["CREATE VIEW vw_high_txn AS SELECT * FROM transactions WHERE amount > 150;"]}, | |
| {"id":"TX_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create table `low_rate` with tax_rates where rate < 0.06.", | |
| "answer_sql":[ | |
| "CREATE TABLE low_rate AS SELECT * FROM tax_rates WHERE rate < 0.06;", | |
| "SELECT * INTO low_rate FROM tax_rates WHERE rate < 0.06;" | |
| ]} | |
| ] | |
| } | |
| # --- Oil & gas wells --- | |
| FALLBACK_PACKS["oil and gas wells"] = { | |
| "schema":{ | |
| "domain":"oil and gas wells", | |
| "tables":[ | |
| {"name":"wells","pk":["well_id"], | |
| "columns":[ | |
| {"name":"well_id","type":"INTEGER"}, | |
| {"name":"well_name","type":"TEXT"}, | |
| {"name":"location","type":"TEXT"}, | |
| {"name":"status","type":"TEXT"}, | |
| {"name":"depth","type":"INTEGER"}], | |
| "fks":[], | |
| "rows":[ | |
| {"well_id":1,"well_name":"Alpha-1","location":"TX-TRV","status":"producing","depth":12000}, | |
| {"well_id":2,"well_name":"Bravo-2","location":"TX-TRV","status":"shut-in","depth":10500}, | |
| {"well_id":3,"well_name":"Cedar-7","location":"OK-CAD","status":"producing","depth":9800}, | |
| {"well_id":4,"well_name":"Delta-3","location":"ND-WIL","status":"drilling","depth":7000}, | |
| {"well_id":5,"well_name":"Eagle-5","location":"CO-DNV","status":"producing","depth":8500}, | |
| {"well_id":6,"well_name":"Fox-9","location":"NM-LEA","status":"producing","depth":11000}]}, | |
| {"name":"operators","pk":["operator_id"], | |
| "columns":[ | |
| {"name":"operator_id","type":"INTEGER"}, | |
| {"name":"name","type":"TEXT"}, | |
| {"name":"contact","type":"TEXT"}], | |
| "fks":[], | |
| "rows":[ | |
| {"operator_id":10,"name":"PetroMax","contact":"pmx@example.com"}, | |
| {"operator_id":11,"name":"BlueRock Energy","contact":"blue@example.com"}, | |
| {"operator_id":12,"name":"HighPlains LLC","contact":"hp@example.com"}, | |
| {"operator_id":13,"name":"Mesa Oil","contact":"mesa@example.com"}]}, | |
| {"name":"well_operators","pk":["well_id","operator_id"], | |
| "columns":[ | |
| {"name":"well_id","type":"INTEGER"}, | |
| {"name":"operator_id","type":"INTEGER"}, | |
| {"name":"start_date","type":"TEXT"}], | |
| "fks":[ | |
| {"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}, | |
| {"columns":["operator_id"],"ref_table":"operators","ref_columns":["operator_id"]}], | |
| "rows":[ | |
| {"well_id":1,"operator_id":10,"start_date":"2023-01-01"}, | |
| {"well_id":2,"operator_id":10,"start_date":"2023-06-01"}, | |
| {"well_id":3,"operator_id":11,"start_date":"2022-03-15"}, | |
| {"well_id":4,"operator_id":12,"start_date":"2024-02-01"}, | |
| {"well_id":5,"operator_id":13,"start_date":"2022-10-10"}, | |
| {"well_id":6,"operator_id":11,"start_date":"2021-08-05"}]}, | |
| {"name":"production","pk":["prod_id"], | |
| "columns":[ | |
| {"name":"prod_id","type":"INTEGER"}, | |
| {"name":"well_id","type":"INTEGER"}, | |
| {"name":"month","type":"TEXT"}, | |
| {"name":"oil_bbl","type":"REAL"}, | |
| {"name":"gas_mcf","type":"REAL"}], | |
| "fks":[{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}], | |
| "rows":[ | |
| {"prod_id":1001,"well_id":1,"month":"2024-01","oil_bbl":1200,"gas_mcf":5000}, | |
| {"prod_id":1002,"well_id":1,"month":"2024-02","oil_bbl":1180,"gas_mcf":5100}, | |
| {"prod_id":1003,"well_id":3,"month":"2024-01","oil_bbl":900,"gas_mcf":3000}, | |
| {"prod_id":1004,"well_id":3,"month":"2024-02","oil_bbl":950,"gas_mcf":3100}, | |
| {"prod_id":1005,"well_id":5,"month":"2024-01","oil_bbl":600,"gas_mcf":2200}, | |
| {"prod_id":1006,"well_id":6,"month":"2024-01","oil_bbl":750,"gas_mcf":2600}, | |
| {"prod_id":1007,"well_id":2,"month":"2024-01","oil_bbl":0,"gas_mcf":0}, | |
| {"prod_id":1008,"well_id":4,"month":"2024-02","oil_bbl":100,"gas_mcf":400}]}] | |
| }, | |
| "questions":[ | |
| {"id":"OG_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"List all rows from `wells`.", | |
| "answer_sql":["SELECT * FROM wells;"]}, | |
| {"id":"OG_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"Return `well_name`, `status` from `wells`.", | |
| "answer_sql":["SELECT well_name, status FROM wells;"]}, | |
| {"id":"OG_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"Wells deeper than 10,000 ft (return `well_name`, `depth`).", | |
| "answer_sql":["SELECT well_name, depth FROM wells WHERE depth > 10000;"]}, | |
| {"id":"OG_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Using `w` and `o`, show `w.well_name` with `o.name` (operator).", | |
| "answer_sql":["SELECT w.well_name, o.name FROM wells w JOIN well_operators wo ON w.well_id=wo.well_id JOIN operators o ON wo.operator_id=o.operator_id;"], | |
| "requires_aliases":True,"required_aliases":["w","o"]}, | |
| {"id":"OG_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Join `production` to `wells` and return `well_name`, total `oil_bbl` per well.", | |
| "answer_sql":["SELECT w.well_name, SUM(p.oil_bbl) AS total_oil FROM production p INNER JOIN wells w ON p.well_id=w.well_id GROUP BY w.well_name;"]}, | |
| {"id":"OG_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"List operators and count of wells (include operators with zero). Columns `name`, `well_count`.", | |
| "answer_sql":["SELECT o.name, COUNT(wo.well_id) AS well_count FROM operators o LEFT JOIN well_operators wo ON o.operator_id=wo.operator_id GROUP BY o.name;"]}, | |
| {"id":"OG_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create view `vw_prod_jan` for January 2024 production.", | |
| "answer_sql":["CREATE VIEW vw_prod_jan AS SELECT * FROM production WHERE month='2024-01';"]}, | |
| {"id":"OG_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create table `active_wells` for wells with status='producing'.", | |
| "answer_sql":[ | |
| "CREATE TABLE active_wells AS SELECT * FROM wells WHERE status='producing';", | |
| "SELECT * INTO active_wells FROM wells WHERE status='producing';" | |
| ]} | |
| ] | |
| } | |
| # --- Marketing --- | |
| FALLBACK_PACKS["marketing"] = { | |
| "schema":{ | |
| "domain":"marketing", | |
| "tables":[ | |
| {"name":"channels","pk":["channel_id"], | |
| "columns":[ | |
| {"name":"channel_id","type":"INTEGER"}, | |
| {"name":"channel_name","type":"TEXT"}], | |
| "fks":[], | |
| "rows":[ | |
| {"channel_id":1,"channel_name":"Search"}, | |
| {"channel_id":2,"channel_name":"Social"}, | |
| {"channel_id":3,"channel_name":"Email"}, | |
| {"channel_id":4,"channel_name":"Display"}]}, | |
| {"name":"campaigns","pk":["campaign_id"], | |
| "columns":[ | |
| {"name":"campaign_id","type":"INTEGER"}, | |
| {"name":"campaign_name","type":"TEXT"}, | |
| {"name":"channel_id","type":"INTEGER"}, | |
| {"name":"start_date","type":"TEXT"}, | |
| {"name":"budget","type":"REAL"}], | |
| "fks":[{"columns":["channel_id"],"ref_table":"channels","ref_columns":["channel_id"]}], | |
| "rows":[ | |
| {"campaign_id":101,"campaign_name":"Spring Search","channel_id":1,"start_date":"2024-03-01","budget":5000}, | |
| {"campaign_id":102,"campaign_name":"Brand Social","channel_id":2,"start_date":"2024-03-05","budget":3000}, | |
| {"campaign_id":103,"campaign_name":"Welcome Email","channel_id":3,"start_date":"2024-03-07","budget":1000}, | |
| {"campaign_id":104,"campaign_name":"Retargeting","channel_id":4,"start_date":"2024-03-10","budget":2000}, | |
| {"campaign_id":105,"campaign_name":"Summer Search","channel_id":1,"start_date":"2024-06-01","budget":6000}, | |
| {"campaign_id":106,"campaign_name":"Promo Social","channel_id":2,"start_date":"2024-06-05","budget":3500}]}, | |
| {"name":"ad_stats","pk":["campaign_id","day"], | |
| "columns":[ | |
| {"name":"campaign_id","type":"INTEGER"}, | |
| {"name":"day","type":"TEXT"}, | |
| {"name":"impressions","type":"INTEGER"}, | |
| {"name":"clicks","type":"INTEGER"}, | |
| {"name":"spend","type":"REAL"}], | |
| "fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}], | |
| "rows":[ | |
| {"campaign_id":101,"day":"2024-03-12","impressions":10000,"clicks":500,"spend":200.0}, | |
| {"campaign_id":101,"day":"2024-03-13","impressions":12000,"clicks":600,"spend":230.0}, | |
| {"campaign_id":102,"day":"2024-03-12","impressions":8000,"clicks":400,"spend":150.0}, | |
| {"campaign_id":103,"day":"2024-03-12","impressions":5000,"clicks":250,"spend":80.0}, | |
| {"campaign_id":104,"day":"2024-03-12","impressions":7000,"clicks":210,"spend":110.0}, | |
| {"campaign_id":106,"day":"2024-06-12","impressions":9500,"clicks":520,"spend":190.0}]}, | |
| {"name":"leads","pk":["lead_id"], | |
| "columns":[ | |
| {"name":"lead_id","type":"INTEGER"}, | |
| {"name":"campaign_id","type":"INTEGER"}, | |
| {"name":"source","type":"TEXT"}, | |
| {"name":"qualified","type":"INTEGER"}, | |
| {"name":"revenue","type":"REAL"}], | |
| "fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}], | |
| "rows":[ | |
| {"lead_id":1,"campaign_id":101,"source":"LP1","qualified":1,"revenue":400}, | |
| {"lead_id":2,"campaign_id":101,"source":"LP2","qualified":0,"revenue":0}, | |
| {"lead_id":3,"campaign_id":102,"source":"FB","qualified":1,"revenue":250}, | |
| {"lead_id":4,"campaign_id":103,"source":"Email","qualified":1,"revenue":300}, | |
| {"lead_id":5,"campaign_id":104,"source":"DSP","qualified":0,"revenue":0}, | |
| {"lead_id":6,"campaign_id":106,"source":"FB","qualified":1,"revenue":500}]}] | |
| }, | |
| "questions":[ | |
| {"id":"M_Q01","category":"SELECT *","difficulty":1, | |
| "prompt_md":"Show all channels.", | |
| "answer_sql":["SELECT * FROM channels;"]}, | |
| {"id":"M_Q02","category":"SELECT columns","difficulty":1, | |
| "prompt_md":"Return `campaign_name`, `budget` from `campaigns`.", | |
| "answer_sql":["SELECT campaign_name, budget FROM campaigns;"]}, | |
| {"id":"M_Q03","category":"WHERE","difficulty":1, | |
| "prompt_md":"Campaigns with budget β₯ 3000 (show `campaign_name`, `budget`).", | |
| "answer_sql":["SELECT campaign_name, budget FROM campaigns WHERE budget >= 3000;"]}, | |
| {"id":"M_Q04","category":"Aliases","difficulty":1, | |
| "prompt_md":"Join `campaigns` (`c`) with `channels` (`ch`) and show `c.campaign_name`, `ch.channel_name`.", | |
| "answer_sql":["SELECT c.campaign_name, ch.channel_name FROM campaigns c JOIN channels ch ON c.channel_id=ch.channel_id;"], | |
| "requires_aliases":True,"required_aliases":["c","ch"]}, | |
| {"id":"M_Q05","category":"JOIN (INNER)","difficulty":2, | |
| "prompt_md":"Join `ad_stats` with `campaigns` and return `campaign_name`, total `clicks`.", | |
| "answer_sql":["SELECT c.campaign_name, SUM(s.clicks) AS total_clicks FROM ad_stats s INNER JOIN campaigns c ON s.campaign_id=c.campaign_id GROUP BY c.campaign_name;"]}, | |
| {"id":"M_Q06","category":"JOIN (LEFT)","difficulty":2, | |
| "prompt_md":"Channels and number of campaigns (include channels with zero). Columns `channel_name`, `campaigns`.", | |
| "answer_sql":["SELECT ch.channel_name, COUNT(c.campaign_id) AS campaigns FROM channels ch LEFT JOIN campaigns c ON ch.channel_id=c.channel_id GROUP BY ch.channel_name;"]}, | |
| {"id":"M_Q07","category":"VIEW","difficulty":2, | |
| "prompt_md":"Create view `vw_cost_per_click` with `campaign_id`, `day`, `spend/clicks` as `cpc` (avoid divide-by-zero).", | |
| "answer_sql":["CREATE VIEW vw_cost_per_click AS SELECT campaign_id, day, CASE WHEN clicks=0 THEN NULL ELSE spend*1.0/clicks END AS cpc FROM ad_stats;"]}, | |
| {"id":"M_Q08","category":"CTAS / SELECT INTO","difficulty":2, | |
| "prompt_md":"Create table `qualified_leads` of leads where `qualified=1`.", | |
| "answer_sql":[ | |
| "CREATE TABLE qualified_leads AS SELECT * FROM leads WHERE qualified=1;", | |
| "SELECT * INTO qualified_leads FROM leads WHERE qualified=1;" | |
| ]} | |
| ] | |
| } | |
| # Helpers to get a pack by domain (normalize key) | |
| def get_fallback_pack_for(domain_name: str) -> Tuple[Dict[str,Any], List[Dict[str,Any]]]: | |
| key = (domain_name or "").strip().lower() | |
| for k, pack in FALLBACK_PACKS.items(): | |
| if k.lower() == key: | |
| return pack["schema"], pack["questions"] | |
| # default to bookstore | |
| p = FALLBACK_PACKS["bookstore"] | |
| return p["schema"], p["questions"] | |
| # -------------------- OpenAI prompts + parsing helpers -------------------- | |
| ALLOWED_DOMAINS = ["bookstore", "retail sales", "wholesaler", "sales tax", "oil and gas wells", "marketing"] | |
| DOMAIN_CYCLE_POS = 0 # will be set after first install | |
| def _domain_prompt(prev_domain: Optional[str], preferred_domain: Optional[str]) -> str: | |
| target = preferred_domain or "bookstore" | |
| extra = f" (previous domain was '{prev_domain}', do not reuse it)" if prev_domain else "" | |
| return f""" | |
| Return ONLY a valid JSON object (no markdown, no prose). | |
| You MUST set the top-level property "domain" to EXACTLY "{target}" (string match).{extra} | |
| The JSON must have: | |
| - "domain": "{target}" | |
| - "tables": 3β4 table objects | |
| - "questions": 8β12 question objects | |
| Tables: | |
| - SQLite-friendly. Use snake_case. | |
| - Each table: name, pk (list), columns (list of {{name,type}}), fks (list of {{columns,ref_table,ref_columns}}), rows (8β15 small seed rows). | |
| Questions: | |
| - Categories among: "SELECT *", "SELECT columns", "WHERE", "Aliases", | |
| "JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO". | |
| - Include at least one LEFT JOIN, one VIEW, one CTAS or SELECT INTO. | |
| - Provide 1β3 'answer_sql' strings per question. | |
| - Prefer SQLite-compatible SQL. Do NOT use RIGHT/FULL OUTER JOIN. | |
| """ | |
| def _loose_json_parse(s: str) -> Optional[dict]: | |
| try: | |
| return json.loads(s) | |
| except Exception: | |
| pass | |
| start = s.find("{"); end = s.rfind("}") | |
| if start != -1 and end != -1 and end > start: | |
| try: | |
| return json.loads(s[start:end+1]) | |
| except Exception: | |
| return None | |
| return None | |
| _SQL_FENCE = re.compile(r"```sql(.*?)```", re.IGNORECASE | re.DOTALL) | |
| _CODE_FENCE = re.compile(r"```(.*?)```", re.DOTALL) | |
| def _strip_code_fences(txt: str) -> str: | |
| if txt is None: return "" | |
| m = _SQL_FENCE.findall(txt) | |
| if m: return "\n".join([x.strip() for x in m if x.strip()]) | |
| m2 = _CODE_FENCE.findall(txt) | |
| if m2: return "\n".join([x.strip() for x in m2 if x.strip()]) | |
| return txt.strip() | |
| def _as_list_of_sql(val) -> List[str]: | |
| if val is None: return [] | |
| if isinstance(val, str): | |
| s = _strip_code_fences(val) | |
| parts = [p.strip() for p in s.split("\n") if p.strip()] | |
| return parts or ([s] if s else []) | |
| if isinstance(val, list): | |
| out = [] | |
| for v in val: | |
| if isinstance(v, str): | |
| s = _strip_code_fences(v) | |
| if s: out.append(s) | |
| return out | |
| return [] | |
| def _canon_question(q: Dict[str, Any]) -> Optional[Dict[str, Any]]: | |
| if not isinstance(q, dict): return None | |
| cat = q.get("category") or q.get("type") or q.get("topic") | |
| prompt = q.get("prompt_md") or q.get("prompt") or q.get("question") or q.get("text") | |
| answer_sql = q.get("answer_sql") or q.get("answers") or q.get("solutions") or q.get("sql") | |
| diff = q.get("difficulty") or 1 | |
| req_alias = bool(q.get("requires_aliases", False)) | |
| req_aliases = q.get("required_aliases") or [] | |
| cat = str(cat).strip() if cat is not None else "" | |
| prompt = str(prompt).strip() if prompt is not None else "" | |
| answers = _as_list_of_sql(answer_sql) | |
| if not cat or not prompt or not answers: return None | |
| known = { | |
| "SELECT *","SELECT columns","WHERE","Aliases", | |
| "JOIN (INNER)","JOIN (LEFT)","Aggregation","VIEW","CTAS / SELECT INTO" | |
| } | |
| if cat not in known: | |
| low = cat.lower() | |
| if "select *" in low: cat = "SELECT *" | |
| elif "columns" in low: cat = "SELECT columns" | |
| elif "where" in low or "filter" in low: cat = "WHERE" | |
| elif "alias" in low: cat = "Aliases" | |
| elif "left" in low: cat = "JOIN (LEFT)" | |
| elif "inner" in low or "join" in low: cat = "JOIN (INNER)" | |
| elif "agg" in low or "group" in low: cat = "Aggregation" | |
| elif "view" in low: cat = "VIEW" | |
| elif "into" in low or "ctas" in low: cat = "CTAS / SELECT INTO" | |
| if isinstance(req_aliases, str): | |
| req_aliases = [a.strip() for a in re.split(r"[,\s]+", req_aliases) if a.strip()] | |
| elif not isinstance(req_aliases, list): | |
| req_aliases = [] | |
| # Soften ambiguous aggregation prompts so users know what to return | |
| if cat == "Aggregation": | |
| pl = (prompt or "").lower() | |
| if ("for each" in pl or "per " in pl) and not re.search(r"`[^`]+`", prompt or ""): | |
| prompt += "\n\n*Note:* Return the grouping column and the aggregate. If the wording says βfor each β¦β, include entities with zero by using a LEFT JOIN." | |
| return { | |
| "id": str(q.get("id") or f"LLM_{int(time.time()*1000)}_{random.randint(100,999)}"), | |
| "category": cat, | |
| "difficulty": int(diff) if str(diff).isdigit() else 1, | |
| "prompt_md": prompt, | |
| "answer_sql": answers, | |
| "requires_aliases": bool(req_alias), | |
| "required_aliases": req_aliases, | |
| } | |
| def _canon_tables(tables: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
| out = [] | |
| for t in (tables or []): | |
| if not isinstance(t, dict): continue | |
| name = str(t.get("name","")).strip() | |
| if not name: continue | |
| cols = t.get("columns") or [] | |
| good_cols = [] | |
| for c in cols: | |
| if not isinstance(c, dict): continue | |
| cname = str(c.get("name","")).strip() | |
| ctype = str(c.get("type","TEXT")).strip() or "TEXT" | |
| if cname: good_cols.append({"name": cname, "type": ctype}) | |
| if not good_cols: continue | |
| pk = t.get("pk") or [] | |
| if isinstance(pk, str): pk = [pk] | |
| fks = t.get("fks") or [] | |
| rows = t.get("rows") or [] | |
| out.append({ | |
| "name": name, | |
| "pk": [str(x) for x in pk], | |
| "columns": good_cols, | |
| "fks": fks if isinstance(fks, list) else [], | |
| "rows": rows if isinstance(rows, list) else [], | |
| }) | |
| return out | |
| def llm_generate_domain_and_questions(prev_domain: Optional[str], preferred_domain: Optional[str]): | |
| if not OPENAI_AVAILABLE or not os.getenv("OPENAI_API_KEY"): | |
| return None, "OpenAI client not available or OPENAI_API_KEY missing.", None, {"accepted_questions":0,"dropped_questions":0} | |
| errors = [] | |
| prompt = _domain_prompt(prev_domain, preferred_domain) | |
| for model in _candidate_models(): | |
| try: | |
| try: | |
| chat = _client.chat.completions.create( | |
| model=model, | |
| messages=[{"role":"user","content": prompt}], | |
| temperature=0.6, | |
| response_format={"type":"json_object"} | |
| ) | |
| data_text = chat.choices[0].message.content | |
| except TypeError: | |
| chat = _client.chat.completions.create( | |
| model=model, | |
| messages=[{"role":"system","content":"Return ONLY a JSON object. No markdown."}, | |
| {"role":"user","content": prompt}], | |
| temperature=0.6 | |
| ) | |
| data_text = chat.choices[0].message.content | |
| obj_raw = _loose_json_parse(data_text or "") | |
| if not obj_raw: | |
| raise RuntimeError("Could not parse JSON from model output.") | |
| for k in ["domain","tables","questions"]: | |
| if k not in obj_raw: | |
| raise RuntimeError(f"Missing key '{k}'") | |
| # Force domain to preferred | |
| if preferred_domain and (str(obj_raw.get("domain","")).strip().lower() != preferred_domain.strip().lower()): | |
| raise RuntimeError(f"Model returned domain '{obj_raw.get('domain')}', expected '{preferred_domain}'.") | |
| tables = _canon_tables(obj_raw.get("tables", [])) | |
| if not tables: raise RuntimeError("No usable tables in LLM output.") | |
| obj_raw["tables"] = tables | |
| dropped = 0 | |
| clean_qs = [] | |
| for q in obj_raw.get("questions", []): | |
| cq = _canon_question(q) | |
| if not cq: dropped += 1; continue | |
| answers = [a for a in cq["answer_sql"] if " right join " not in a.lower() and " full " not in a.lower()] | |
| if not answers: dropped += 1; continue | |
| cq["answer_sql"] = answers | |
| clean_qs.append(cq) | |
| if not clean_qs: | |
| raise RuntimeError("No usable questions after canonicalization.") | |
| stats = {"accepted_questions": len(clean_qs), "dropped_questions": dropped} | |
| obj_raw["questions"] = clean_qs | |
| return obj_raw, None, model, stats | |
| except Exception as e: | |
| errors.append(f"{model}: {e}") | |
| continue | |
| return None, "; ".join(errors) if errors else "Unknown LLM error.", None, {"accepted_questions":0,"dropped_questions":0} | |
| # -------------------- Install schema & prepare questions -------------------- | |
| def drop_existing_domain_tables(con: sqlite3.Connection, keep_internal=True): | |
| with DB_LOCK: | |
| cur = con.cursor() | |
| cur.execute("SELECT name, type FROM sqlite_master WHERE type IN ('table','view')") | |
| items = cur.fetchall() | |
| for name, typ in items: | |
| if keep_internal and name in ("users","attempts","session_meta"): | |
| continue | |
| try: | |
| cur.execute(f"DROP {typ.upper()} IF EXISTS {name}") | |
| except Exception: | |
| pass | |
| con.commit() | |
| def install_schema(con: sqlite3.Connection, schema: Dict[str,Any]): | |
| drop_existing_domain_tables(con, keep_internal=True) | |
| with DB_LOCK: | |
| cur = con.cursor() | |
| for t in schema.get("tables", []): | |
| cols_sql = [] | |
| pk = t.get("pk", []) | |
| for c in t.get("columns", []): | |
| cols_sql.append(f"{c['name']} {c.get('type','TEXT')}") | |
| if pk: cols_sql.append(f"PRIMARY KEY ({', '.join(pk)})") | |
| create_sql = f"CREATE TABLE {t['name']} ({', '.join(cols_sql)})" | |
| cur.execute(create_sql) | |
| for t in schema.get("tables", []): | |
| if not t.get("rows"): continue | |
| cols = [c["name"] for c in t.get("columns", [])] | |
| qmarks = ",".join(["?"]*len(cols)) | |
| insert_sql = f"INSERT INTO {t['name']} ({', '.join(cols)}) VALUES ({qmarks})" | |
| for r in t["rows"]: | |
| if isinstance(r, dict): vals = [r.get(col, None) for col in cols] | |
| elif isinstance(r, (list, tuple)): | |
| vals = list(r) + [None]*(len(cols)-len(r)); vals = vals[:len(cols)] | |
| else: continue | |
| cur.execute(insert_sql, vals) | |
| con.commit() | |
| cur.execute("INSERT OR REPLACE INTO session_meta(id, domain, schema_json) VALUES (1, ?, ?)", | |
| (schema.get("domain","unknown"), json.dumps(schema))) | |
| con.commit() | |
| def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str], preferred_domain: str): | |
| obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain, preferred_domain) | |
| if obj is None: | |
| schema, questions = get_fallback_pack_for(preferred_domain) | |
| info = {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0} | |
| return schema, questions, info | |
| return obj, obj["questions"], {"source":"openai","model":model_used,"error":None,"accepted":stats["accepted_questions"],"dropped":stats["dropped_questions"]} | |
| def install_schema_and_prepare_questions(prev_domain: Optional[str], preferred_domain: str): | |
| schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain, preferred_domain) | |
| install_schema(CONN, schema) | |
| if not questions: | |
| schema2, questions2 = get_fallback_pack_for(preferred_domain) | |
| install_schema(CONN, schema2) | |
| schema, questions, info = schema2, questions2, {"source":"fallback","model":None,"error":"No usable questions from LLM","accepted":0,"dropped":0} | |
| return schema, questions, info | |
| # -------------------- Domain cycling -------------------- | |
| def _norm(s: str) -> str: | |
| return (s or "").strip().lower() | |
| def _index_of_domain(name: str) -> int: | |
| low = _norm(name) | |
| for i, d in enumerate(ALLOWED_DOMAINS): | |
| if _norm(d) == low: | |
| return i | |
| return 0 | |
| def choose_next_domain(prev_domain: Optional[str]) -> str: | |
| global DOMAIN_CYCLE_POS | |
| if prev_domain is None: | |
| DOMAIN_CYCLE_POS = 0 | |
| return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS] | |
| idx = _index_of_domain(prev_domain) | |
| DOMAIN_CYCLE_POS = (idx + 1) % len(ALLOWED_DOMAINS) | |
| return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS] | |
| # -------------------- Initialize first domain -------------------- | |
| CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None, preferred_domain="bookstore") | |
| DOMAIN_CYCLE_POS = _index_of_domain(CURRENT_SCHEMA.get("domain","bookstore")) | |
| # -------------------- Progress + mastery -------------------- | |
| def upsert_user(con: sqlite3.Connection, user_id: str, name: str): | |
| with DB_LOCK: | |
| cur = con.cursor() | |
| cur.execute("SELECT user_id FROM users WHERE user_id = ?", (user_id,)) | |
| if cur.fetchone() is None: | |
| cur.execute("INSERT INTO users (user_id, name, created_at) VALUES (?, ?, ?)", | |
| (user_id, name, datetime.now(timezone.utc).isoformat())) | |
| else: | |
| cur.execute("UPDATE users SET name=? WHERE user_id=?", (name, user_id)) | |
| con.commit() | |
| CATEGORIES_ORDER = [ | |
| "SELECT *", "SELECT columns", "WHERE", "Aliases", | |
| "JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO" | |
| ] | |
| def topic_stats(df_attempts: pd.DataFrame) -> pd.DataFrame: | |
| rows = [] | |
| for cat in CATEGORIES_ORDER: | |
| sub = df_attempts[df_attempts["category"] == cat] if not df_attempts.empty else pd.DataFrame() | |
| att = int(sub.shape[0]) if not sub.empty else 0 | |
| cor = int(sub["correct"].sum()) if not sub.empty else 0 | |
| acc = float(cor / max(att, 1)) | |
| rows.append({"category":cat,"attempts":att,"correct":cor,"accuracy":acc}) | |
| return pd.DataFrame(rows) | |
| def fetch_attempts(con: sqlite3.Connection, user_id: str) -> pd.DataFrame: | |
| with DB_LOCK: | |
| return pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", con, params=(user_id,)) | |
| def pick_next_question(user_id: str) -> Dict[str,Any]: | |
| pool = CURRENT_QS if CURRENT_QS else get_fallback_pack_for(CURRENT_SCHEMA.get("domain","bookstore"))[1] | |
| df = fetch_attempts(CONN, user_id) | |
| stats = topic_stats(df) | |
| stats = stats.sort_values(by=["accuracy","attempts"], ascending=[True, True]) if not stats.empty else stats | |
| weakest = stats.iloc[0]["category"] if not stats.empty else CATEGORIES_ORDER[0] | |
| cands = [q for q in pool if str(q.get("category","")).strip() == weakest] or pool | |
| return dict(random.choice(cands)) | |
| # -------------------- SQL execution & grading -------------------- | |
| def run_df(con: sqlite3.Connection, sql: str) -> pd.DataFrame: | |
| with DB_LOCK: | |
| return pd.read_sql_query(sql, con) | |
| def rewrite_select_into(sql: str) -> Tuple[str, Optional[str]]: | |
| s = sql.strip().strip(";") | |
| if re.search(r"\bselect\b.+\binto\b.+\bfrom\b", s, flags=re.IGNORECASE|re.DOTALL): | |
| m = re.match(r"(?is)^\s*select\s+(.*?)\s+into\s+([A-Za-z_][A-Za-z0-9_]*)\s+from\s+(.*)$", s) | |
| if m: | |
| cols, tbl, rest = m.groups() | |
| return f"CREATE TABLE {tbl} AS SELECT {cols} FROM {rest}", tbl | |
| return sql, None | |
| def detect_unsupported_joins(sql: str) -> Optional[str]: | |
| low = sql.lower() | |
| if " right join " in low: | |
| return "SQLite does not support RIGHT JOIN. Use LEFT JOIN in the opposite direction." | |
| if " full join " in low or " full outer join " in low: | |
| return "SQLite does not support FULL OUTER JOIN. Use LEFT JOIN plus UNION." | |
| if " ilike " in low: | |
| return "SQLite has no ILIKE. Use LOWER(col) LIKE LOWER('%pattern%')." | |
| return None | |
| def detect_cartesian(con: sqlite3.Connection, sql: str, df_result: pd.DataFrame) -> Optional[str]: | |
| low = sql.lower() | |
| if " cross join " in low: return "Query uses CROSS JOIN (cartesian product). Ensure this is intended." | |
| comma_from = re.search(r"\bfrom\b\s+([a-z_]\w*)\s*,\s*([a-z_]\w*)", low) | |
| missing_on = (" join " in low) and (" on " not in low) and (" using " not in low) and (" natural " not in low) | |
| if comma_from or missing_on: | |
| try: | |
| with DB_LOCK: | |
| cur = con.cursor() | |
| if comma_from: t1, t2 = comma_from.groups() | |
| else: | |
| m = re.search(r"\bfrom\b\s+([a-z_]\w*)", low); j = re.search(r"\bjoin\b\s+([a-z_]\w*)", low) | |
| if not m or not j: return "Possible cartesian product: no join condition detected." | |
| t1, t2 = m.group(1), j.group(1) | |
| cur.execute(f"SELECT COUNT(*) FROM {t1}"); n1 = cur.fetchone()[0] | |
| cur.execute(f"SELECT COUNT(*) FROM {t2}"); n2 = cur.fetchone()[0] | |
| prod = n1 * n2 | |
| if len(df_result) == prod and prod > 0: | |
| return f"Result row count equals {n1}Γ{n2}={prod}. Likely cartesian product (missing join)." | |
| except Exception: | |
| return "Possible cartesian product: no join condition detected." | |
| return None | |
| # Column enforcement policy β relax unless prompt makes it explicit | |
| def should_enforce_columns(q: Dict[str, Any]) -> bool: | |
| """ | |
| Enforce exact projection only when the prompt explicitly asks for columns, | |
| or when DDL creates a new object where column shape matters. | |
| """ | |
| cat = (q.get("category") or "").strip() | |
| if cat in ("SELECT columns", "VIEW", "CTAS / SELECT INTO"): | |
| return True | |
| prompt = (q.get("prompt_md") or "") | |
| prompt_l = prompt.lower() | |
| # Signals that projection is explicitly requested | |
| if re.search(r"`[^`]+`", prompt): # backticked names | |
| return True | |
| if re.search(r"\((?:show|return|display)[^)]+\)", prompt_l): | |
| return True | |
| if re.search(r"\b(show|return|display|select)\b[^.]{0,120}\b(columns?|fields?|name|title|price|count|sum|avg|max|min)\b", prompt_l): | |
| return True | |
| return False | |
| def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame: | |
| out = df.copy() | |
| out.columns = [str(c).strip().lower() for c in out.columns] | |
| return out | |
| def results_equal_or_superset(df_student: pd.DataFrame, df_expected: pd.DataFrame) -> Tuple[bool, Optional[str]]: | |
| a = _normalize_columns(df_student); b = _normalize_columns(df_expected) | |
| if set(a.columns) == set(b.columns): | |
| a2 = a[sorted(a.columns)].sort_values(sorted(a.columns)).reset_index(drop=True) | |
| b2 = b[sorted(a.columns)].sort_values(sorted(a.columns)).reset_index(drop=True) | |
| return (a2.equals(b2), None) | |
| if set(b.columns).issubset(set(a.columns)): | |
| a_proj = a[b.columns] | |
| a2 = a_proj.sort_values(list(b.columns)).reset_index(drop=True) | |
| b2 = b.sort_values(list(b.columns)).reset_index(drop=True) | |
| if a2.equals(b2): | |
| return True, "extra_columns" | |
| return False, None | |
| def results_equal_rowcount_only(df_student: pd.DataFrame, df_expected: pd.DataFrame) -> bool: | |
| return df_student.shape[0] == df_expected.shape[0] | |
| def aliases_present(sql: str, required_aliases: List[str]) -> bool: | |
| low = re.sub(r"\s+", " ", (sql or "").lower()) | |
| for al in (required_aliases or []): | |
| if f" {al}." not in low and f" as {al} " not in low: | |
| return False | |
| return True | |
| def exec_student_sql(sql_text: str): | |
| if not sql_text or not sql_text.strip(): | |
| return None, "Enter a SQL statement.", None, None | |
| sql_raw = sql_text.strip().rstrip(";") | |
| sql_rew, created_tbl = rewrite_select_into(sql_raw) | |
| note = "Rewrote `SELECT ... INTO` to `CREATE TABLE ... AS SELECT ...` for SQLite." if sql_rew != sql_raw else None | |
| unsup = detect_unsupported_joins(sql_rew) | |
| if unsup: return None, unsup, None, note | |
| try: | |
| low = sql_rew.lower() | |
| if low.startswith("select"): | |
| df = run_df(CONN, sql_rew) | |
| warn = detect_cartesian(CONN, sql_rew, df) | |
| return df, None, warn, note | |
| else: | |
| with DB_LOCK: | |
| cur = CONN.cursor() | |
| cur.execute(sql_rew); CONN.commit() | |
| if low.startswith("create view"): | |
| m = re.match(r"(?is)^\s*create\s+view\s+(if\s+not\s+exists\s+)?([a-z_]\w*)\s+as\s+(select.*)$", low) | |
| name = m.group(2) if m else None | |
| if name: | |
| try: return pd.read_sql_query(f"SELECT * FROM {name}", CONN), None, None, note | |
| except Exception: return None, "View created but could not be queried.", None, note | |
| if low.startswith("create table"): | |
| tbl = created_tbl | |
| if not tbl: | |
| m = re.match(r"(?is)^\s*create\s+table\s+(if\s+not\s+exists\s+)?([a-z_]\w*)\s+as\s+select.*$", low) | |
| tbl = m.group(2) if m else None | |
| if tbl: | |
| try: return pd.read_sql_query(f"SELECT * FROM {tbl}", CONN), None, None, note | |
| except Exception: return None, "Table created but could not be queried.", None, note | |
| return pd.DataFrame(), None, None, note | |
| except Exception as e: | |
| msg = str(e) | |
| if "no such table" in msg.lower(): return None, f"{msg}. Check table names for this randomized domain.", None, note | |
| if "no such column" in msg.lower(): return None, f"{msg}. Use correct column names or prefixes (alias.column).", None, note | |
| if "ambiguous column name" in msg.lower(): return None, f"{msg}. Qualify the column with a table alias.", None, note | |
| if "misuse of aggregate" in msg.lower() or "aggregate functions are not allowed in" in msg.lower(): | |
| return None, f"{msg}. You might need a GROUP BY for non-aggregated columns.", None, note | |
| if "near \"into\"" in msg.lower() and "syntax error" in msg.lower(): | |
| return None, "SQLite doesnβt support `SELECT ... INTO`. I can rewrite it automaticallyβtry again.", None, note | |
| if "syntax error" in msg.lower(): | |
| return None, f"Syntax error. Check commas, keywords, parentheses. Raw error: {msg}", None, note | |
| return None, f"SQL error: {msg}", None, note | |
| def answer_df(answer_sql: List[str]) -> Optional[pd.DataFrame]: | |
| for sql in answer_sql: | |
| try: | |
| low = sql.strip().lower() | |
| if low.startswith("select"): return run_df(CONN, sql) | |
| if low.startswith("create view"): | |
| m = re.match(r"(?is)^\s*create\s+view\s+(if\s+not\s+exists\s+)?([a-z_]\w*)\s+as\s+select.*$", low) | |
| view_name = m.group(2) if m else "vw_tmp" | |
| with DB_LOCK: | |
| cur = CONN.cursor() | |
| cur.execute(f"DROP VIEW IF EXISTS {view_name}") | |
| cur.execute(sql); CONN.commit() | |
| return run_df(CONN, f"SELECT * FROM {view_name}") | |
| if low.startswith("create table"): | |
| m = re.match(r"(?is)^\s*create\s+table\s+(if\s+not\s+exists\s+)?([a-z_]\w*)\s+as\s+select.*$", low) | |
| tbl = m.group(2) if m else None | |
| with DB_LOCK: | |
| cur = CONN.cursor() | |
| if tbl: cur.execute(f"DROP TABLE IF EXISTS {tbl}") | |
| cur.execute(sql); CONN.commit() | |
| if tbl: return run_df(CONN, f"SELECT * FROM {tbl}") | |
| except Exception: | |
| continue | |
| return None | |
| def _canonical_expects_left_join(q: Dict[str, Any]) -> bool: | |
| canon = " ".join(q.get("answer_sql", [])).lower() | |
| return " left join " in canon | |
| def validate_answer(q: Dict[str,Any], student_sql: str, df_student: Optional[pd.DataFrame]) -> Tuple[bool, str]: | |
| df_expected = answer_df(q["answer_sql"]) | |
| if df_expected is None: | |
| return (df_student is not None), f"**Explanation:** Your statement executed successfully for this task." | |
| if df_student is None: | |
| return False, f"**Explanation:** Expected data result differs." | |
| enforce_cols = should_enforce_columns(q) | |
| if enforce_cols: | |
| ok, note = results_equal_or_superset(df_student, df_expected) | |
| if ok and note == "extra_columns": | |
| return True, "**Note:** You returned extra columns. The rows match; try selecting only the requested columns next time." | |
| if ok: | |
| return True, "**Explanation:** Your result matches a canonical solution." | |
| return False, f"**Explanation:** Compare your result to a canonical solution." | |
| else: | |
| # Projection not enforced β compare coverage (row counts) | |
| if df_student.shape[0] == df_expected.shape[0]: | |
| return True, "**Explanation:** Columns werenβt specified for this task; row count matches the canonical answer." | |
| if df_student.shape[0] < df_expected.shape[0] and _canonical_expects_left_join(q): | |
| return False, "**Hint:** This task expects *every* entity (including those with zero related rows). Use a `LEFT JOIN` from the dimension table." | |
| return False, "**Explanation:** For this task we compared row counts (projection not enforced) and they didnβt match." | |
| def log_attempt(user_id: str, qid: str, category: str, correct: bool, sql_text: str, | |
| time_taken: float, difficulty: int, source: str, notes: str): | |
| with DB_LOCK: | |
| cur = CONN.cursor() | |
| cur.execute(""" | |
| INSERT INTO attempts (user_id, question_id, category, correct, sql_text, timestamp, time_taken, difficulty, source, notes) | |
| VALUES (?,?,?,?,?,?,?,?,?,?) | |
| """, (user_id, qid, category, int(correct), sql_text, datetime.now(timezone.utc).isoformat(), | |
| time_taken, difficulty, source, notes)) | |
| CONN.commit() | |
| # -------------------- UI callbacks -------------------- | |
| def start_session(name: str, session: dict): | |
| name = (name or "").strip() | |
| if not name: | |
| return (session, | |
| gr.update(value="Please enter your name to begin.", visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| draw_dynamic_erd(CURRENT_SCHEMA), | |
| gr.update(visible=False), | |
| pd.DataFrame(), | |
| pd.DataFrame()) | |
| slug = "-".join(name.lower().split()) | |
| user_id = slug[:64] if slug else f"user-{int(time.time())}" | |
| upsert_user(CONN, user_id, name) | |
| q = pick_next_question(user_id) | |
| session = {"user_id": user_id, "name": name, "qid": q["id"], "start_ts": time.time(), "q": q} | |
| prompt = q["prompt_md"] | |
| stats = topic_stats(fetch_attempts(CONN, user_id)) | |
| erd = draw_dynamic_erd(CURRENT_SCHEMA) | |
| return (session, | |
| gr.update(value=f"**Question {q['id']}**\n\n{prompt}", visible=True), | |
| gr.update(visible=True), | |
| gr.update(value="", visible=True), | |
| erd, | |
| gr.update(visible=False), | |
| stats, | |
| pd.DataFrame()) | |
| def render_preview(sql_text: str, session: dict): | |
| if not session or "q" not in session: | |
| return gr.update(value="", visible=False), draw_dynamic_erd(CURRENT_SCHEMA) | |
| s = (sql_text or "").strip() | |
| if not s: | |
| return gr.update(value="", visible=False), draw_dynamic_erd(CURRENT_SCHEMA) | |
| hi_tables, hi_edges = sql_highlights(s, CURRENT_SCHEMA) | |
| erd = draw_dynamic_erd(CURRENT_SCHEMA, highlight_tables=hi_tables, highlight_edges=hi_edges) | |
| html = highlight_sql_html(s) | |
| return gr.update(value=html, visible=True), erd | |
| def submit_answer(sql_text: str, session: dict): | |
| if not session or "user_id" not in session or "q" not in session: | |
| return gr.update(value="Start a session first.", visible=True), pd.DataFrame(), gr.update(visible=False), pd.DataFrame() | |
| user_id = session["user_id"] | |
| q = session["q"] | |
| elapsed = max(0.0, time.time() - session.get("start_ts", time.time())) | |
| df, err, warn, note = exec_student_sql(sql_text) | |
| details = [] | |
| if note: details.append(f"βΉοΈ {note}") | |
| if err: | |
| fb = f"β **Did not run**\n\n{err}" | |
| if details: fb += "\n\n" + "\n".join(details) | |
| log_attempt(user_id, q.get("id","?"), q.get("category","?"), False, sql_text, elapsed, int(q.get("difficulty",1)), "bank", " | ".join([err] + details)) | |
| stats = topic_stats(fetch_attempts(CONN, user_id)) | |
| return gr.update(value=fb, visible=True), pd.DataFrame(), gr.update(visible=True), stats | |
| alias_msg = None | |
| if q.get("requires_aliases") and not aliases_present(sql_text, q.get("required_aliases", [])): | |
| alias_msg = f"β οΈ This task asked for aliases {q.get('required_aliases', [])}. I didnβt detect them." | |
| is_correct, explanation = validate_answer(q, sql_text, df) | |
| if warn: details.append(f"β οΈ {warn}") | |
| if alias_msg: details.append(alias_msg) | |
| prefix = "β **Correct!**" if is_correct else "β **Not quite.**" | |
| feedback = prefix | |
| if details: feedback += "\n\n" + "\n".join(details) | |
| feedback += "\n\n" + explanation + "\n\n**One acceptable solution:**\n```sql\n" + q["answer_sql"][0].rstrip(";") + ";\n```" | |
| log_attempt(user_id, q["id"], q.get("category","?"), bool(is_correct), sql_text, elapsed, int(q.get("difficulty",1)), "bank", " | ".join(details)) | |
| stats = topic_stats(fetch_attempts(CONN, user_id)) | |
| return gr.update(value=feedback, visible=True), (df if df is not None else pd.DataFrame()), gr.update(visible=True), stats | |
| def next_question(session: dict): | |
| if not session or "user_id" not in session: | |
| return session, gr.update(value="Start a session first.", visible=True), gr.update(visible=False), draw_dynamic_erd(CURRENT_SCHEMA), gr.update(visible=False) | |
| user_id = session["user_id"] | |
| q = pick_next_question(user_id) | |
| session["qid"] = q["id"]; session["q"] = q; session["start_ts"] = time.time() | |
| return session, gr.update(value=f"**Question {q['id']}**\n\n{q['prompt_md']}", visible=True), gr.update(value="", visible=True), draw_dynamic_erd(CURRENT_SCHEMA), gr.update(visible=False) | |
| def show_hint(session: dict): | |
| if not session or "q" not in session: | |
| return gr.update(value="Start a session first.", visible=True) | |
| cat = session["q"].get("category","?") | |
| hint = { | |
| "SELECT *": "Use `SELECT * FROM table_name`.", | |
| "SELECT columns": "List columns: `SELECT col1, col2 FROM table_name`.", | |
| "WHERE": "Filter with `WHERE` and combine conditions using AND/OR.", | |
| "Aliases": "Use `table_name t` and qualify as `t.col`.", | |
| "JOIN (INNER)": "Join with `... INNER JOIN ... ON left.key = right.key`.", | |
| "JOIN (LEFT)": "LEFT JOIN keeps all rows from the left table.", | |
| "Aggregation": "Use aggregates and `GROUP BY` non-aggregated columns.", | |
| "VIEW": "`CREATE VIEW view_name AS SELECT ...`.", | |
| "CTAS / SELECT INTO": "SQLite uses `CREATE TABLE name AS SELECT ...`." | |
| }.get(cat, "Identify keys from the schema and join on them.") | |
| return gr.update(value=f"**Hint:** {hint}", visible=True) | |
| def export_progress(user_name: str): | |
| slug = "-".join((user_name or "").lower().split()) | |
| if not slug: return None | |
| user_id = slug[:64] | |
| with DB_LOCK: | |
| df = pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", CONN, params=(user_id,)) | |
| os.makedirs(EXPORT_DIR, exist_ok=True) | |
| path = os.path.abspath(os.path.join(EXPORT_DIR, f"{user_id}_progress.csv")) | |
| (pd.DataFrame([{"info":"No attempts yet."}]) if df.empty else df).to_csv(path, index=False) | |
| return path | |
| def _domain_status_md(): | |
| if CURRENT_INFO.get("source","openai") == "openai": | |
| accepted = CURRENT_INFO.get("accepted",0); dropped = CURRENT_INFO.get("dropped",0) | |
| return (f"β **Domain via OpenAI** `{CURRENT_INFO.get('model','?')}` β **{CURRENT_SCHEMA.get('domain','?')}**. " | |
| f"Accepted questions: {accepted}, dropped: {dropped}. \n" | |
| f"Tables: {', '.join(t['name'] for t in CURRENT_SCHEMA.get('tables', []))}.") | |
| err = CURRENT_INFO.get("error",""); err_short = (err[:160] + "β¦") if len(err) > 160 else err | |
| return f"β οΈ **OpenAI randomization unavailable** β using fallback **{CURRENT_SCHEMA.get('domain','?')}**.\n\n> Reason: {err_short}" | |
| def list_tables_for_preview(): | |
| df = run_df(CONN, """ | |
| SELECT name FROM sqlite_master | |
| WHERE type in ('table','view') | |
| AND name NOT LIKE 'sqlite_%' | |
| AND name NOT IN ('users','attempts','session_meta') | |
| ORDER BY type, name | |
| """) | |
| return df["name"].tolist() if not df.empty else ["(no tables)"] | |
| # Always reseed a question on randomize (creates a guest session if needed) | |
| def regenerate_domain(session: dict): | |
| global CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO | |
| prev = CURRENT_SCHEMA.get("domain") if CURRENT_SCHEMA else None | |
| preferred = choose_next_domain(prev) | |
| CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=prev, preferred_domain=preferred) | |
| erd = draw_dynamic_erd(CURRENT_SCHEMA) | |
| status = _domain_status_md() | |
| # Ensure a session (guest if needed) | |
| if not session or not session.get("user_id"): | |
| user_id = f"guest-{int(time.time())}" | |
| upsert_user(CONN, user_id, "Guest") | |
| session = {"user_id": user_id, "name": "Guest", "qid": None, "start_ts": time.time(), "q": None} | |
| # Seed next question for this session | |
| q = pick_next_question(session["user_id"]) | |
| session.update({"qid": q["id"], "q": q, "start_ts": time.time()}) | |
| stats = topic_stats(fetch_attempts(CONN, session["user_id"])) | |
| empty_df = pd.DataFrame() | |
| dd_update = gr.update(choices=list_tables_for_preview(), value=None) | |
| return ( | |
| gr.update(value=status, visible=True), # regen_fb | |
| erd, # er_image | |
| gr.update(value=f"**Question {q['id']}**\n\n{q['prompt_md']}", visible=True), # prompt_md | |
| gr.update(value="", visible=True), # sql_input | |
| dd_update, # tbl_dd | |
| stats, # mastery_df | |
| empty_df, # result_df | |
| session # session_state | |
| ) | |
| def preview_table(tbl: str): | |
| try: | |
| if not tbl or tbl=="(no tables)": | |
| return pd.DataFrame() | |
| return run_df(CONN, f"SELECT * FROM {tbl} LIMIT 20") | |
| except Exception as e: | |
| return pd.DataFrame([{"error": str(e)}]) | |
| # -------------------- UI -------------------- | |
| SQL_PREVIEW_STYLES = """ | |
| <style> | |
| :root { | |
| --sqlkw-core: #1f6feb; /* SELECT, FROM, WHERE */ | |
| --sqlkw-join: #b80672; /* JOIN family */ | |
| --sqlkw-group: #107c41; /* GROUP BY / HAVING */ | |
| --sqlkw-order: #6f42c1; /* ORDER BY / LIMIT / OFFSET */ | |
| --sqlkw-ddl: #b26c12; /* CREATE / INSERT / UPDATE / DELETE */ | |
| } | |
| .sql-preview pre { white-space: pre-wrap; margin: 0; } | |
| .sql-preview code { | |
| font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace; | |
| font-size: 13px; | |
| } | |
| .sql-preview .kw { font-weight: 700; } | |
| .sql-preview .kw-core { color: var(--sqlkw-core) !important; } | |
| .sql-preview .kw-join { color: var(--sqlkw-join) !important; } | |
| .sql-preview .kw-group { color: var(--sqlkw-group) !important; } | |
| .sql-preview .kw-order { color: var(--sqlkw-order) !important; } | |
| .sql-preview .kw-ddl { color: var(--sqlkw-ddl) !important; } | |
| </style> | |
| """ | |
| with gr.Blocks(title="Adaptive SQL Trainer β Randomized Domains") as demo: | |
| gr.HTML(SQL_PREVIEW_STYLES) | |
| gr.Markdown( | |
| """ | |
| # π§ͺ Adaptive SQL Trainer β Randomized Domains (SQLite) | |
| - Uses **OpenAI** (if configured) to randomize a domain (bookstore, retail sales, wholesaler, | |
| sales tax, oil & gas wells, marketing), generate **3β4 tables** and **8β12** questions. | |
| - Practice `SELECT`, `WHERE`, `JOIN` (INNER/LEFT), **aliases**, **views**, and **CTAS / SELECT INTO**. | |
| - **ERD shows tables only** and bolds the ones referenced in your SQL. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| name_box = gr.Textbox(label="Your Name", placeholder="e.g., Jordan Alvarez") | |
| start_btn = gr.Button("Start / Resume Session", variant="primary") | |
| session_state = gr.State({"user_id": None, "name": None, "qid": None, "start_ts": None, "q": None}) | |
| gr.Markdown("---") | |
| gr.Markdown("### Dataset Controls") | |
| regen_btn = gr.Button("π Randomize Dataset (OpenAI)") | |
| regen_fb = gr.Markdown(_domain_status_md(), visible=True) | |
| gr.Markdown("---") | |
| gr.Markdown("### Instructor Tools") | |
| export_name = gr.Textbox(label="Export a student's progress (enter name)") | |
| export_btn = gr.Button("Export CSV") | |
| export_file = gr.File(label="Download progress") | |
| gr.Markdown("---") | |
| gr.Markdown("### Quick Table/View Preview (top 20 rows)") | |
| tbl_dd = gr.Dropdown(choices=list_tables_for_preview(), label="Pick table/view", interactive=True) | |
| tbl_btn = gr.Button("Preview") | |
| preview_df = gr.Dataframe(value=pd.DataFrame(), interactive=False) | |
| with gr.Column(scale=2): | |
| prompt_md = gr.Markdown(visible=False) | |
| sql_input = gr.Textbox(label="Your SQL", placeholder="Type SQL here (end ; optional).", lines=6, visible=False) | |
| preview_md = gr.HTML(visible=False) | |
| er_image = gr.Image(label="Entity Diagram", value=draw_dynamic_erd(CURRENT_SCHEMA), height=PLOT_HEIGHT) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Run & Submit", variant="primary") | |
| hint_btn = gr.Button("Hint") | |
| next_btn = gr.Button("Next Question βΆ", visible=False) | |
| feedback_md = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown("### Your Progress by Category") | |
| mastery_df = gr.Dataframe(headers=["category","attempts","correct","accuracy"], | |
| col_count=(4,"dynamic"), row_count=(0,"dynamic"), interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### Result Preview") | |
| result_df = gr.Dataframe(value=pd.DataFrame(), interactive=False) | |
| start_btn.click( | |
| start_session, | |
| inputs=[name_box, session_state], | |
| outputs=[session_state, prompt_md, sql_input, preview_md, er_image, next_btn, mastery_df, result_df], | |
| ) | |
| sql_input.change( | |
| render_preview, | |
| inputs=[sql_input, session_state], | |
| outputs=[preview_md, er_image], | |
| ) | |
| submit_btn.click( | |
| submit_answer, | |
| inputs=[sql_input, session_state], | |
| outputs=[feedback_md, result_df, next_btn, mastery_df], | |
| ) | |
| next_btn.click( | |
| next_question, | |
| inputs=[session_state], | |
| outputs=[session_state, prompt_md, sql_input, er_image, next_btn], | |
| ) | |
| hint_btn.click( | |
| show_hint, | |
| inputs=[session_state], | |
| outputs=[feedback_md], | |
| ) | |
| export_btn.click( | |
| export_progress, | |
| inputs=[export_name], | |
| outputs=[export_file], | |
| ) | |
| regen_btn.click( | |
| regenerate_domain, | |
| inputs=[session_state], | |
| outputs=[regen_fb, er_image, prompt_md, sql_input, tbl_dd, mastery_df, result_df, session_state], | |
| ) | |
| tbl_btn.click( | |
| preview_table, | |
| inputs=[tbl_dd], | |
| outputs=[preview_df] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |