hjerpe commited on
Commit
5dd1bb4
·
verified ·
1 Parent(s): 34d2fe8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. AGENTS.md +145 -0
  2. CLAUDE.md +61 -0
  3. Dockerfile +85 -0
  4. GEMINI.md +62 -0
  5. README.md +135 -5
  6. REVIEW_REPORT.md +57 -0
  7. __init__.py +36 -0
  8. client.py +140 -0
  9. conftest.py +3 -0
  10. data/__init__.py +1 -0
  11. data/databases/__init__.py +1 -0
  12. data/databases/models.py +153 -0
  13. data/questions/db_list.json +12 -0
  14. data/questions/questions_eval.json +0 -0
  15. data/questions/questions_train.json +0 -0
  16. data/questions/student_assessment.json +3355 -0
  17. docs/ARCHITECTURE.md +361 -0
  18. docs/README.md +41 -0
  19. docs/RUNBOOK.md +10 -0
  20. docs/blog-outline.md +56 -0
  21. docs/design-docs/decisions/0001-template.md +26 -0
  22. docs/design-docs/index.md +57 -0
  23. docs/guides/README.md +24 -0
  24. docs/learnings/F007-architecture.md +1 -0
  25. docs/learnings/F007-conventions.md +2 -0
  26. docs/learnings/F007-gotchas.md +2 -0
  27. docs/learnings/F007-integrations.md +2 -0
  28. docs/learnings/F007-security.md +1 -0
  29. docs/learnings/F007-testing.md +1 -0
  30. docs/learnings/F007-workflow.md +1 -0
  31. docs/references/README.md +5 -0
  32. evaluation/__init__.py +11 -0
  33. evaluation/green_agent.py +199 -0
  34. models.py +272 -0
  35. notebooks/train_grpo.ipynb +226 -0
  36. opencode.jsonc +283 -0
  37. openenv.yaml +6 -0
  38. progress.log +29 -0
  39. pyproject.toml +69 -0
  40. scripts/curate_questions.py +921 -0
  41. scripts/download_spider_data.py +106 -0
  42. scripts/download_spider_databases.py +301 -0
  43. scripts/generate_models_from_schema.py +294 -0
  44. server/__init__.py +5 -0
  45. server/app.py +110 -0
  46. server/install_deps.sh +12 -0
  47. server/requirements.txt +6 -0
  48. server/reward.py +185 -0
  49. server/sql_environment.py +635 -0
  50. server/synthetic/__init__.py +25 -0
AGENTS.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project Map (AGENTS.md)
2
+
3
+ This file is a navigation map for agents. Durable knowledge lives in `docs/`.
4
+
5
+ ## Start Here
6
+
7
+ - Docs index: [docs/README.md](docs/README.md)
8
+ - Architecture: [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
9
+ - Operations: [docs/RUNBOOK.md](docs/RUNBOOK.md)
10
+ - Test: `uv run pytest tests/ -v`
11
+
12
+ ## System-of-Record Documents
13
+
14
+ | Category | Location | Type | Purpose |
15
+ |----------|----------|------|---------|
16
+ | Guides | [docs/guides/README.md](docs/guides/README.md) | how-to | Practical procedures |
17
+ | Design docs | [docs/design-docs/index.md](docs/design-docs/index.md) | explanation | Feature design, ADRs |
18
+ | References | [docs/references/README.md](docs/references/README.md) | reference | External docs |
19
+
20
+ ## Project Structure
21
+
22
+ This project follows the [OpenEnv](https://github.com/meta-pytorch/OpenEnv) `openenv init` convention.
23
+ The project root **is** the environment package — no `envs/` nesting.
24
+
25
+ ```
26
+ sql-env/ # project root = environment package
27
+ ├── __init__.py # exports SQLAction, SQLObservation, SQLEnvClient
28
+ ├── models.py # Pydantic models (action w/ tokens, observation w/ messages, state)
29
+ ├── client.py # SQLEnvClient(EnvClient) — WebSocket client w/ tensor serialization
30
+ ├── conftest.py # pytest config (ignores __init__.py collection)
31
+ ├── openenv.yaml # OpenEnv manifest
32
+ ├── pyproject.toml # deps + package config (setuptools, torch, transformers)
33
+ ├── .python-version # pins Python 3.12
34
+ ├── data/
35
+ │ ├── databases/
36
+ │ │ └── models.py # SQLAlchemy ORM models (student_assessment)
37
+ │ └── questions/
38
+ │ └── student_assessment.json # 30+ Spider Q&A pairs with gold SQL
39
+ ├── server/
40
+ │ ├── app.py # FastAPI app (tokenizer factory, MockTokenizer fallback)
41
+ │ ├── sql_environment.py # SQLEnvironment(Environment) — core logic + Ollama
42
+ │ ├── test_sql_env.py # MockTokenizer (char-code encoding for dev/test)
43
+ │ ├── reward.py # Reward computation (stub — Phase 3)
44
+ │ ├── verifier.py # Answer comparison (stub — Phase 3)
45
+ │ ├── Dockerfile
46
+ │ ├── requirements.txt
47
+ │ └── install_deps.sh # Docker setup script
48
+ ├── scripts/
49
+ │ ├── download_spider_data.py # Download Spider questions from HuggingFace
50
+ │ └── generate_models_from_schema.py # Auto-generate SQLAlchemy models
51
+ ├── tests/
52
+ │ └── test_smoke.py # 21 tests (models, env, actions, client, schema)
53
+ ├── docs/ # Design docs, architecture
54
+ └── AGENTS.md
55
+ ```
56
+
57
+ ## Guardrails
58
+
59
+ - **Testing:** Use the package manager (`uv run pytest ...`), never bare `pytest`.
60
+ - **Git safety:** No destructive commands (`reset --hard`, `push --force`) unless explicit.
61
+ - **Secrets:** Never commit `.env` or credentials.
62
+
63
+ ## Quick Commands
64
+
65
+ | Task | Command |
66
+ |------|---------|
67
+ | Install | `uv sync` |
68
+ | Lint | `uv run ruff check --fix .` |
69
+ | Format | `uv run ruff format .` |
70
+ | Test | `uv run pytest tests/ -v` |
71
+ | Run server | `uv run uvicorn server.app:app --reload` |
72
+ | Validate env | `uv run openenv validate --verbose` |
73
+ | Build Docker | `uv run openenv build` |
74
+ | Push to HF | `uv run openenv push` |
75
+
76
+ ## Development Workflow
77
+
78
+ - Run via package manager (`uv run ...`), never bare commands.
79
+ - List existing files before creating new ones (avoid naming drift).
80
+ - Prefer vertical slices over horizontal refactors.
81
+ - No premature abstraction until multiple use-cases require it.
82
+
83
+ <!-- GUIDELINES-BEGIN -->
84
+
85
+ ## Delivery Safety (Move Fast Without Breaking Things)
86
+
87
+ Move fast by taking the smallest responsible step that produces real feedback, while pre-committing to guardrails so being wrong is survivable.
88
+
89
+ - **Small batches:** Prefer vertical slices and small PRs; reduce blast radius and review/debug time.
90
+ - **Define "broken" first:** Before shipping, write down what you will watch (errors, latency, correctness, cost) and the abort threshold.
91
+ - **Design for reversibility:** Make changes easy to turn off, roll back, or ignore.
92
+
93
+ ## System Boundaries (Avoid Analysis Paralysis)
94
+
95
+ Systems are continuous webs; plans require artificial boundaries.
96
+
97
+ - **Boundary rule:** Include only variables/components that could change the decision you are making.
98
+ - **Clouds:** Treat everything else as exogenous inputs; track them as risks/assumptions.
99
+ - **Timebox mapping:** If the landscape is moving faster than you can model it, run a probe (spike, canary, A/B) instead.
100
+
101
+ ## Maturity Modes
102
+
103
+ Match guardrails to maturity:
104
+
105
+ - **Exploratory:** Learning > durability. Prefer spikes; avoid irreversible state changes; manual verification is OK; expect throwaway code.
106
+ - **MVP:** Ship a thin end-to-end slice. Manual checks are OK, but you still need a fast rollback path and bounded impact.
107
+ - **Production:** Build to last. Automated tests, observability, progressive rollout, and explicit rollback/incident posture.
108
+
109
+ Expect limiting factors to move as you ship: fix the current bottleneck, then re-diagnose the next.
110
+
111
+ ## Progressive Delivery
112
+
113
+ - **Feature flags:** Use flags to make risky changes reversible. Categorize flags (release/experiment/ops/permissioning).
114
+ - **Flags are inventory:** Every flag needs an owner, an expiry, and a removal plan.
115
+ - **Canary/ramp when risk is non-trivial:** Start small, watch signals, ramp gradually; prefer "flip off" over redeploy.
116
+
117
+ ## Reliability Control Loop (If You Run Production)
118
+
119
+ - **SLO + error budget:** If you are within budget, keep shipping; if you burn budget, freeze non-critical changes and pay down reliability.
120
+
121
+ ## Avoid
122
+
123
+ - Big-bang releases, long-lived branches, unowned flags, flaky tests, and alert noise.
124
+
125
+ ## Python Guidelines
126
+
127
+ - Prefer type hints for public APIs; use `typing` / `collections.abc`.
128
+ - Use NumPy-style docstrings; keep them synced with type hints.
129
+ - Error handling: Use specific exceptions; avoid `try: ... except Exception: pass`.
130
+ - Dependencies: Use `uv add <package>`; do not manually edit `pyproject.toml`.
131
+
132
+ ## Docs Expectations
133
+
134
+ - Keep durable design/ops knowledge in `docs/` (architecture, runbook, decisions). Keep AGENTS.md as a short map, not an encyclopedia.
135
+
136
+ ## Testing Standards
137
+
138
+ - **Always use the project's package manager** to run tests. Never invoke test runners directly.
139
+ - Python (uv): `uv run pytest tests/ -v` (NEVER bare `pytest`)
140
+ - Python (poetry): `poetry run pytest tests/ -v`
141
+ - Node: `npm test` or `npm run test`
142
+ - Rust: `cargo test`
143
+ - **Rationale:** Bare `pytest` bypasses the virtualenv and may use the wrong Python/dependencies. Package managers ensure the correct environment. Bare invocations also trigger unnecessary permission prompts in automated workflows.
144
+
145
+ <!-- GUIDELINES-END -->
CLAUDE.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project Map (AGENTS.md)
2
+
3
+ This file is a navigation map for agents. Durable knowledge lives in `docs/`.
4
+
5
+ ## Start Here
6
+
7
+ - Docs index: [docs/README.md](docs/README.md)
8
+ - Architecture: [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
9
+ - Operations: [docs/RUNBOOK.md](docs/RUNBOOK.md)
10
+ - Validate: `opencode-ctx docs validate`
11
+ - Test: `uv run pytest tests/ -v`
12
+
13
+ ## System-of-Record Documents
14
+
15
+ | Category | Location | Type | Purpose |
16
+ |----------|----------|------|---------|
17
+ | Guides | [docs/guides/README.md](docs/guides/README.md) | how-to | Practical procedures |
18
+ | Design docs | [docs/design-docs/index.md](docs/design-docs/index.md) | explanation | Feature design, ADRs |
19
+ | Core beliefs | [docs/design-docs/core-beliefs.md](docs/design-docs/core-beliefs.md) | explanation | Agent-first principles |
20
+ | Learnings | [docs/learnings/README.md](docs/learnings/README.md) | reference | Durable patterns |
21
+ | Exec plans | [docs/exec-plans/README.md](docs/exec-plans/README.md) | how-to | Complex work tracking |
22
+ | Discovery | [docs/discovery/index.md](docs/discovery/index.md) | explanation | Validate + Taste |
23
+ | Delivery specs | [docs/delivery-specs/index.md](docs/delivery-specs/index.md) | reference | Engineering handoff |
24
+ | References | [docs/references/README.md](docs/references/README.md) | reference | External docs |
25
+ | Exploration | [docs/exploration/README.md](docs/exploration/README.md) | exploration | Ideas, scratchpad |
26
+ | Taxonomy | [docs/DOCS_TAXONOMY.md](docs/DOCS_TAXONOMY.md) | reference | Where to put new docs |
27
+ | Quality | [docs/QUALITY_SCORE.md](docs/QUALITY_SCORE.md) | reference | Domain grades |
28
+
29
+ ## Guardrails
30
+
31
+ - **Testing:** Use the package manager (`uv run pytest ...`), never bare `pytest`.
32
+ - **Skills:** Call `skill({ name: "<name>" })` first when asked to use a skill.
33
+ - **Config:** Project config in `opencode.jsonc` (repo root); `.opencode/` holds project agents/commands; global fallback in `~/.config/opencode/`.
34
+ - **Git safety:** No destructive commands (`reset --hard`, `push --force`) unless explicit.
35
+ - **Secrets:** Never commit `.env` or credentials.
36
+
37
+ ## Quick Commands
38
+
39
+ | Task | Command |
40
+ |------|---------|
41
+ | Install | `uv sync` |
42
+ | Docs validate | `opencode-ctx docs validate` |
43
+ | Arch snapshot | `opencode-ctx docs architecture apply` |
44
+ | Lint | `uv run ruff check --fix .` |
45
+ | Format | `uv run ruff format .` |
46
+ | Test | `uv run pytest tests/ -v` |
47
+ | Run | `uv run python -m <module>` |
48
+
49
+ ## Development Workflow
50
+
51
+ - Run via package manager (`uv run ...`), never bare commands.
52
+ - List existing files before creating new ones (avoid naming drift).
53
+ - Prefer vertical slices over horizontal refactors.
54
+ - No premature abstraction until multiple use-cases require it.
55
+
56
+ <!-- GUIDELINES-BEGIN -->
57
+
58
+ <!-- Managed by: opencode-ctx guidelines apply --packs python,testing,delivery-safety -->
59
+ <!-- Run the command above to populate this section -->
60
+
61
+ <!-- GUIDELINES-END -->
Dockerfile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-stage build using openenv-base
2
+ # Works for both in-repo and standalone environments.
3
+ # The build script (openenv build) handles context detection.
4
+
5
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
6
+ FROM ${BASE_IMAGE} AS builder
7
+
8
+ WORKDIR /app
9
+
10
+ # Ensure git is available (required for VCS dependencies)
11
+ RUN apt-get update && \
12
+ apt-get install -y --no-install-recommends git && \
13
+ rm -rf /var/lib/apt/lists/*
14
+
15
+ ARG BUILD_MODE=in-repo
16
+ ARG ENV_NAME=sql_env
17
+ # Set to https://download.pytorch.org/whl/cpu for CPU-only (default, smaller image)
18
+ # Set to "" for full CUDA support (GPU deployment)
19
+ ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
20
+
21
+ # Copy environment code
22
+ COPY . /app/env
23
+
24
+ WORKDIR /app/env
25
+
26
+ # Ensure uv is available
27
+ RUN if ! command -v uv >/dev/null 2>&1; then \
28
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
29
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
30
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
31
+ fi
32
+
33
+ # Install dependencies (TORCH_INDEX controls CPU vs CUDA PyTorch)
34
+ RUN --mount=type=cache,target=/root/.cache/uv \
35
+ export UV_PROJECT_ENVIRONMENT=/app/.venv && \
36
+ if [ -n "${TORCH_INDEX}" ]; then export UV_EXTRA_INDEX_URL="${TORCH_INDEX}"; fi && \
37
+ if [ -f uv.lock ]; then \
38
+ uv sync --frozen --no-install-project --no-editable; \
39
+ else \
40
+ uv sync --no-install-project --no-editable; \
41
+ fi
42
+
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ export UV_PROJECT_ENVIRONMENT=/app/.venv && \
45
+ if [ -n "${TORCH_INDEX}" ]; then export UV_EXTRA_INDEX_URL="${TORCH_INDEX}"; fi && \
46
+ if [ -f uv.lock ]; then \
47
+ uv sync --frozen --no-editable; \
48
+ else \
49
+ uv sync --no-editable; \
50
+ fi
51
+
52
+ # Final runtime stage
53
+ FROM ${BASE_IMAGE}
54
+
55
+ WORKDIR /app
56
+
57
+ # Default port (HF Spaces overrides with PORT=7860)
58
+ ENV PORT=8000
59
+
60
+ # Copy the virtual environment from builder
61
+ COPY --from=builder /app/.venv /app/.venv
62
+
63
+ # Copy the environment code
64
+ COPY --from=builder /app/env /app/env
65
+
66
+ # Explicitly copy bundled Spider databases for deployment checks
67
+ COPY --from=builder /app/env/data/databases /app/env/data/databases
68
+
69
+ # Set PATH to use the virtual environment
70
+ ENV PATH="/app/.venv/bin:$PATH"
71
+
72
+ # Set PYTHONPATH so imports work correctly
73
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
74
+
75
+ # Run as non-root for HF Spaces security best practice
76
+ RUN useradd --create-home --uid 10001 appuser
77
+ USER appuser
78
+
79
+ # Health check verifies bundled DBs and API health
80
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
81
+ CMD sh -c 'find /app/env/data/databases -name "*.sqlite" -print -quit | grep -q . && curl -f "http://localhost:${PORT:-8000}/health"' || exit 1
82
+
83
+ # Run the FastAPI server
84
+ ENV ENABLE_WEB_INTERFACE=true
85
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port ${PORT:-8000}"]
GEMINI.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project Map (AGENTS.md)
2
+
3
+ This file is a navigation map for agents. Durable knowledge lives in `docs/`.
4
+
5
+ ## Start Here
6
+
7
+ - Docs index: [docs/README.md](docs/README.md)
8
+ - Architecture: [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
9
+ - Operations: [docs/RUNBOOK.md](docs/RUNBOOK.md)
10
+ - Validate: `opencode-ctx docs validate`
11
+ - Test: `uv run pytest tests/ -v`
12
+
13
+ ## System-of-Record Documents
14
+
15
+ | Category | Location | Type | Purpose |
16
+ |----------|----------|------|---------|
17
+ | Guides | [docs/guides/README.md](docs/guides/README.md) | how-to | Practical procedures |
18
+ | Design docs | [docs/design-docs/index.md](docs/design-docs/index.md) | explanation | Feature design, ADRs |
19
+ | Core beliefs | [docs/design-docs/core-beliefs.md](docs/design-docs/core-beliefs.md) | explanation | Agent-first principles |
20
+ | Learnings | [docs/learnings/README.md](docs/learnings/README.md) | reference | Durable patterns |
21
+ | Exec plans | [docs/exec-plans/README.md](docs/exec-plans/README.md) | how-to | Complex work tracking |
22
+ | Discovery | [docs/discovery/index.md](docs/discovery/index.md) | explanation | Validate + Taste |
23
+ | Delivery specs | [docs/delivery-specs/index.md](docs/delivery-specs/index.md) | reference | Engineering handoff |
24
+ | References | [docs/references/README.md](docs/references/README.md) | reference | External docs |
25
+ | Exploration | [docs/exploration/README.md](docs/exploration/README.md) | exploration | Ideas, scratchpad |
26
+ | Taxonomy | [docs/DOCS_TAXONOMY.md](docs/DOCS_TAXONOMY.md) | reference | Where to put new docs |
27
+ | Quality | [docs/QUALITY_SCORE.md](docs/QUALITY_SCORE.md) | reference | Domain grades |
28
+
29
+ ## Guardrails
30
+
31
+ - **Testing:** Use the package manager (`uv run pytest ...`), never bare `pytest`.
32
+ - **Skills:** Call `skill({ name: "<name>" })` first when asked to use a skill.
33
+ - **Config:** Project config in `opencode.jsonc` (repo root); `.opencode/` holds project agents/commands; global fallback in `~/.config/opencode/`.
34
+ - **Git safety:** No destructive commands (`reset --hard`, `push --force`) unless explicit.
35
+ - **Secrets:** Never commit `.env` or credentials.
36
+
37
+ ## Quick Commands
38
+
39
+ | Task | Command |
40
+ |------|---------|
41
+ | Install | `uv sync` |
42
+ | Init project | `opencode-ctx docs init` (scaffolds docs, config, git hooks) |
43
+ | Docs validate | `opencode-ctx docs validate` |
44
+ | Arch snapshot | `opencode-ctx docs architecture apply` |
45
+ | Lint | `uv run ruff check --fix .` |
46
+ | Format | `uv run ruff format .` |
47
+ | Test | `uv run pytest tests/ -v` |
48
+ | Run | `uv run python -m <module>` |
49
+
50
+ ## Development Workflow
51
+
52
+ - Run via package manager (`uv run ...`), never bare commands.
53
+ - List existing files before creating new ones (avoid naming drift).
54
+ - Prefer vertical slices over horizontal refactors.
55
+ - No premature abstraction until multiple use-cases require it.
56
+
57
+ <!-- GUIDELINES-BEGIN -->
58
+
59
+ <!-- Managed by: opencode-ctx guidelines apply --packs python,testing,delivery-safety -->
60
+ <!-- Run the command above to populate this section -->
61
+
62
+ <!-- GUIDELINES-END -->
README.md CHANGED
@@ -1,10 +1,140 @@
1
  ---
2
- title: Sql Env
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SQLEnv
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ base_path: /web
9
  ---
10
 
11
+ # SQLEnv: Teaching Agents to Explore Databases
12
+
13
+ ![Python](https://img.shields.io/badge/python-3.12-blue.svg)
14
+ ![License](https://img.shields.io/badge/license-MIT-green.svg)
15
+
16
+ SQLEnv is an interactive RL environment for text-to-SQL reasoning. Instead of producing one-shot SQL, agents learn to think like data analysts: inspect schema, sample rows, run exploratory queries, and submit a final answer with confidence.
17
+
18
+ Built for the [OpenEnv Challenge](https://github.com/meta-pytorch/OpenEnv), this project packages environment runtime, dense rewards, evaluation, and training hooks so others can reproduce results and iterate quickly.
19
+
20
+ ## Quick Start
21
+
22
+ Run these three commands to install, validate, and smoke-test the environment:
23
+
24
+ ```bash
25
+ uv sync
26
+ uv run openenv validate --verbose
27
+ uv run pytest tests/ -v
28
+ ```
29
+
30
+ Local server run:
31
+
32
+ ```bash
33
+ uv run uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
34
+ ```
35
+
36
+ Docker run:
37
+
38
+ ```bash
39
+ docker build -t sql-env:latest -f server/Dockerfile .
40
+ docker run -p 8000:8000 sql-env:latest
41
+ ```
42
+
43
+ ## Why SQLEnv
44
+
45
+ Static text-to-SQL benchmarks reward final outputs, not reasoning quality. SQLEnv turns SQL generation into an interactive decision process with feedback at each step, making it suitable for RL training and behavior analysis.
46
+
47
+ ## Architecture
48
+
49
+ ```text
50
+ +-------------+ WebSocket +----------------------+ SQLite
51
+ | RL Agent | <------------------> | SQLEnvClient | <----------------+
52
+ | (GRPO/TRL) | | (client.py) | |
53
+ +-------------+ +----------+-----------+ |
54
+ HTTP/WebSocket |
55
+ | |
56
+ v |
57
+ +--------------------------+ |
58
+ | FastAPI Server | |
59
+ | (server.app:app) | |
60
+ +------------+-------------+ |
61
+ | |
62
+ v |
63
+ +--------------------------+ |
64
+ | SQLEnvironment |------------+
65
+ | step/reset/reward/verify |
66
+ +--------------------------+
67
+ ```
68
+
69
+ ## How It Works
70
+
71
+ Each episode begins with a natural language question mapped to a hidden Spider database. The agent acts through four environment actions:
72
+
73
+ | Action | Purpose | Typical Output |
74
+ |--------|---------|----------------|
75
+ | `DESCRIBE table_name` | Inspect schema and column metadata | Column names, types, row count |
76
+ | `SAMPLE table_name` | Inspect representative rows | Small row sample |
77
+ | `QUERY sql_string` | Execute read-only SQL in sandbox | Query result rows or SQL error |
78
+ | `ANSWER value` | Submit final answer | Terminal reward and completion |
79
+
80
+ Episode flow:
81
+ 1. `reset()` returns question context and available tables.
82
+ 2. `step()` executes one exploration action at a time.
83
+ 3. `ANSWER` ends the episode with correctness-based terminal reward.
84
+
85
+ ## Train an Agent
86
+
87
+ Use the GRPO training pipeline artifacts from F006 and run the notebook workflow:
88
+
89
+ - Notebook: `notebooks/train_grpo.ipynb`
90
+ - Training support modules: `training/`
91
+ - Evaluation utilities: `evaluation/`
92
+
93
+ This setup is designed for Colab and local CPU/GPU environments.
94
+
95
+ ## HuggingFace Space
96
+
97
+ - Live Space: `https://huggingface.co/spaces/<your-org-or-user>/sql-env` (update after push)
98
+ - Health check: `curl https://<space-url>/health`
99
+ - Deploy command: `uv run openenv push`
100
+
101
+ ## Project Structure
102
+
103
+ ```text
104
+ sql-env/
105
+ |- __init__.py
106
+ |- client.py
107
+ |- models.py
108
+ |- openenv.yaml
109
+ |- server/
110
+ | |- app.py
111
+ | |- sql_environment.py
112
+ | |- reward.py
113
+ | |- verifier.py
114
+ | `- Dockerfile
115
+ |- data/
116
+ | |- databases/
117
+ | `- questions/
118
+ |- training/
119
+ |- evaluation/
120
+ |- notebooks/
121
+ | `- train_grpo.ipynb
122
+ |- specs/
123
+ |- docs/
124
+ `- tests/
125
+ ```
126
+
127
+ ## Deployment Checklist
128
+
129
+ 1. `uv run openenv validate --verbose`
130
+ 2. `uv run openenv build`
131
+ 3. `uv run openenv push`
132
+ 4. Verify `/health` and run one full episode through the client.
133
+
134
+ ## Links
135
+
136
+ - OpenEnv framework: https://github.com/meta-pytorch/OpenEnv
137
+ - OpenEnv docs: https://meta-pytorch.org/OpenEnv/
138
+ - Spider dataset: https://huggingface.co/datasets/xlangai/spider
139
+ - TRL OpenEnv docs: https://huggingface.co/docs/trl/openenv
140
+ - Verification plan: `specs/F007-VERIFICATION_SPEC.md`
REVIEW_REPORT.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Review Report: F006 Step 3.1 (`notebooks/train_grpo.ipynb`, `pyproject.toml`, `tests/e2e/test_training_e2e.py`)
2
+
3
+ **Risk Tier:** Medium
4
+ **Status:** Failed
5
+ **Verdict:** BLOCK
6
+
7
+ ## Summary
8
+
9
+ Step 3.1 is not ready to merge. The training extra currently resolves to a TRL version incompatible with the repo’s pinned Torch version, causing notebook imports to fail before training can start. In addition, the added E2E test only validates notebook structure and does not exercise the required one-step training smoke flow from the verification spec.
10
+
11
+ ## Evidence
12
+
13
+ ### Tests
14
+ - **Status:** Passed (limited scope)
15
+ - **Command:** `uv run --with pytest pytest tests/e2e/test_training_e2e.py -v`
16
+ - **Results:** `2 passed, 0 failed`
17
+
18
+ ### Dependency/Runtime Validation
19
+ - **Status:** Failed
20
+ - **Command:** `uv run --extra training python -c "from trl import GRPOConfig, GRPOTrainer; print('ok')"`
21
+ - **Observed:** Import error (`cannot import name 'FSDPModule'`) in TRL with current Torch pin.
22
+
23
+ ### Security (Medium)
24
+ - **Status:** Clear
25
+ - **Checks:** Medium-tier quick checks only (no secrets/auth/unsafe execution patterns introduced in scoped changes).
26
+
27
+ ## Issues
28
+
29
+ ### Critical
30
+ 1. **Training extra resolves to incompatible TRL, breaking notebook startup**
31
+ - **Location:** `pyproject.toml:30-33`, `notebooks/train_grpo.ipynb:29-35`
32
+ - **Problem:** `training = ["trl>=0.12.0", "accelerate>=0.34.0"]` permits latest TRL (installed as 0.29.1), which fails to import with pinned `torch==2.2.2`.
33
+ - **Impact:** Notebook cannot run end-to-end (“one click” success criterion fails before training).
34
+ - **Fix:** Pin a TRL range compatible with Torch 2.2.2 (or upgrade Torch accordingly), then add/import-check coverage in tests.
35
+
36
+ ### Important
37
+ 1. **E2E smoke test does not validate actual Step 3.1 execution path**
38
+ - **Location:** `tests/e2e/test_training_e2e.py:25-65`
39
+ - **Problem:** Test checks notebook text structure and helper filtering only; it does not instantiate trainer, run `trainer.train()`, or verify metrics/comparison outputs as specified.
40
+ - **Impact:** Regressions in training flow can pass CI undetected.
41
+ - **Fix:** Add a true smoke execution test (tiny/mocked model + single train step + metric assertion), aligned to `specs/F006-VERIFICATION_SPEC.md` Section 4.
42
+
43
+ 2. **Comparison cell is not random-vs-trained and does not capture pre-training baseline**
44
+ - **Location:** `notebooks/train_grpo.ipynb:181-183`
45
+ - **Problem:** Both `before_rollouts` and `after_rollouts` use `rollout_func` with the same model after training.
46
+ - **Impact:** Fails the feature’s “before vs after” demo intent (and spec’s random-vs-trained comparison).
47
+ - **Fix:** Capture baseline episodes before training (or explicit random policy), then run trained-policy episodes after `trainer.train()`.
48
+
49
+ ### Minor
50
+ None.
51
+
52
+ ## Next Actions
53
+
54
+ 1. Fix dependency compatibility (TRL/Torch) and prove imports succeed in clean env.
55
+ 2. Upgrade E2E smoke test to execute one real/mocked GRPO training step and assert logged metrics.
56
+ 3. Correct notebook comparison to true baseline-vs-trained behavior.
57
+ 4. Re-run: `uv run --with pytest pytest tests/e2e/test_training_e2e.py -v` and include import-check evidence.
__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQLEnv: Interactive Database Query Environment for the OpenEnv Challenge."""
2
+
3
+ # ---------------------------------------------------------------------------
4
+ # Pydantic / TypedDict compatibility shim
5
+ # ---------------------------------------------------------------------------
6
+ # The openenv library defines ``Message`` with ``typing.TypedDict``.
7
+ # On Python < 3.12, Pydantic 2.x rejects ``typing.TypedDict`` in model
8
+ # fields; it requires ``typing_extensions.TypedDict`` instead. We patch
9
+ # ``typing.TypedDict`` early so that all downstream imports see the
10
+ # compatible version before any Pydantic model is constructed.
11
+ import sys
12
+
13
+ if sys.version_info < (3, 12):
14
+ import typing
15
+ import typing_extensions
16
+
17
+ typing.TypedDict = typing_extensions.TypedDict # type: ignore[attr-defined]
18
+
19
+ try:
20
+ from .models import SQLAction, SQLObservation, SQLState
21
+ except ImportError:
22
+ # When pytest imports this file standalone (not as part of the sql_env
23
+ # package), relative imports fail. Fall back to absolute imports.
24
+ try:
25
+ from sql_env.models import SQLAction, SQLObservation, SQLState # type: ignore[no-redef]
26
+ except ImportError:
27
+ pass # Imports not available; this file is being collected, not used.
28
+
29
+ # Client is not imported at package level to avoid loading torch unnecessarily.
30
+ # Import it explicitly when needed: from sql_env.client import SQLEnvClient
31
+
32
+ __all__ = [
33
+ "SQLAction",
34
+ "SQLObservation",
35
+ "SQLState",
36
+ ]
client.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, Iterable
2
+
3
+ import torch
4
+ from openenv.core.client_types import StepResult
5
+
6
+ from openenv.core.env_server.interfaces import Message
7
+ from openenv.core.env_client import EnvClient
8
+
9
+ from .models import SQLAction, SQLObservation, SQLState
10
+
11
+
12
+ class SQLEnvClient(EnvClient[SQLAction, SQLObservation, SQLState]):
13
+ """Client for interacting with the SQLEnv environment server."""
14
+
15
+ def _step_payload(self, action: SQLAction) -> Dict[str, Any]:
16
+ """Convert a SQLAction into the payload for the step endpoint."""
17
+ return {
18
+ "action_type": action.action_type,
19
+ "argument": action.argument,
20
+ "metadata": action.metadata,
21
+ }
22
+
23
+ def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SQLObservation]:
24
+ """Parse the response from the step endpoint into a StepResult."""
25
+
26
+ obs_data = payload.get("observation")
27
+ if not isinstance(obs_data, dict):
28
+ obs_data = payload
29
+
30
+ done = payload.get("done", obs_data.get("done", False))
31
+ reward = payload.get("reward", obs_data.get("reward"))
32
+
33
+ observation = SQLObservation(
34
+ question=str(obs_data.get("question", "")),
35
+ schema_info=str(obs_data.get("schema_info", "")),
36
+ result=str(obs_data.get("result", "")),
37
+ error=str(obs_data.get("error", "")),
38
+ step_count=int(obs_data.get("step_count", 0)),
39
+ budget_remaining=int(obs_data.get("budget_remaining", 0)),
40
+ action_history=list(obs_data.get("action_history", [])),
41
+ done=bool(done),
42
+ reward=reward,
43
+ metadata=obs_data.get("metadata", {}),
44
+ )
45
+
46
+ return StepResult(
47
+ observation=observation,
48
+ reward=reward,
49
+ done=bool(done),
50
+ )
51
+
52
+ def _parse_state(self, payload: Dict[str, Any]) -> SQLState:
53
+ # Parse history messages
54
+ history_messages = payload.get("history_messages", [])
55
+
56
+ # Parse history tokens - convert lists back to tensors
57
+ history_tokens_data = payload.get("history_tokens", [])
58
+ history_tokens = []
59
+ for token_list in history_tokens_data:
60
+ if token_list:
61
+ history_tokens.append(torch.tensor(token_list))
62
+ else:
63
+ history_tokens.append(torch.tensor([]))
64
+
65
+ return SQLState(
66
+ episode_id=payload.get("episode_id"),
67
+ step_count=payload.get("step_count", 0),
68
+ history_messages=history_messages,
69
+ history_tokens=history_tokens,
70
+ current_action_type=payload.get("current_action_type", "query"),
71
+ )
72
+
73
+ def _detect_action_type(self, message_content: str) -> str:
74
+ """Detect the action type from user message content."""
75
+ content_lower = message_content.lower()
76
+
77
+ if content_lower.startswith("answer "):
78
+ return "ANSWER"
79
+
80
+ describe_keywords = [
81
+ "describe",
82
+ "schema",
83
+ "columns",
84
+ "structure",
85
+ "what columns",
86
+ "show columns",
87
+ ]
88
+ if any(keyword in content_lower for keyword in describe_keywords):
89
+ return "DESCRIBE"
90
+
91
+ sample_keywords = [
92
+ "sample",
93
+ "example",
94
+ "rows",
95
+ "data",
96
+ "show me",
97
+ "few rows",
98
+ "how many",
99
+ ]
100
+ if any(keyword in content_lower for keyword in sample_keywords):
101
+ return "SAMPLE"
102
+
103
+ return "QUERY"
104
+
105
+ def message_to_action(
106
+ self,
107
+ message: Message,
108
+ tokenizer: Any,
109
+ history_messages: Iterable[Message] | None = None,
110
+ ) -> SQLAction:
111
+ """Convert a user Message into a SQLAction."""
112
+ if "role" not in message:
113
+ raise ValueError("Message must contain a 'role' key")
114
+ if "content" not in message:
115
+ raise ValueError("Message must contain a 'content' key")
116
+ if message["content"] is None:
117
+ raise ValueError("Message content cannot be None")
118
+
119
+ _ = tokenizer
120
+ _ = history_messages
121
+
122
+ content = str(message["content"])
123
+ parsed = content.strip()
124
+
125
+ action_type = "QUERY"
126
+ argument = content
127
+ if message["role"].lower() == "user" and parsed:
128
+ prefix, separator, remainder = parsed.partition(" ")
129
+ normalized_prefix = prefix.upper()
130
+ if normalized_prefix in {"DESCRIBE", "SAMPLE", "QUERY", "ANSWER"}:
131
+ action_type = normalized_prefix
132
+ argument = remainder if separator else ""
133
+ else:
134
+ action_type = self._detect_action_type(parsed)
135
+ argument = parsed
136
+
137
+ return SQLAction(
138
+ action_type=action_type,
139
+ argument=argument,
140
+ )
conftest.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """Pytest configuration — exclude package __init__.py from collection."""
2
+
3
+ collect_ignore = ["__init__.py"]
data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SQLEnv data package — databases and question sets."""
data/databases/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SQLAlchemy ORM models for SQLEnv databases."""
data/databases/models.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLAlchemy ORM models for the university course management database.
3
+
4
+ This module defines all tables using SQLAlchemy declarative syntax with proper
5
+ relationships and data types.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
10
+ from sqlalchemy.orm import declarative_base, relationship
11
+
12
+ Base = declarative_base()
13
+
14
+
15
+ class Address(Base):
16
+ """Address information for people."""
17
+
18
+ __tablename__ = "Addresses"
19
+
20
+ address_id = Column(Integer, primary_key=True, autoincrement=True)
21
+ line_1 = Column(String(255), nullable=False)
22
+ line_2 = Column(String(255))
23
+ city = Column(String(100))
24
+ zip_postcode = Column(String(20))
25
+ state_province_county = Column(String(100))
26
+ country = Column(String(100))
27
+
28
+ # Relationships
29
+ people_addresses = relationship("PersonAddress", back_populates="address")
30
+
31
+
32
+ class Person(Base):
33
+ """Person information."""
34
+
35
+ __tablename__ = "People"
36
+
37
+ person_id = Column(Integer, primary_key=True, autoincrement=True)
38
+ first_name = Column(String(100), nullable=False)
39
+ middle_name = Column(String(100))
40
+ last_name = Column(String(100), nullable=False)
41
+ cell_mobile_number = Column(String(20))
42
+ email_address = Column(String(255))
43
+ login_name = Column(String(100), unique=True)
44
+ password = Column(String(255))
45
+
46
+ # Relationships
47
+ people_addresses = relationship("PersonAddress", back_populates="person")
48
+
49
+
50
+ class Student(Base):
51
+ """Student information."""
52
+
53
+ __tablename__ = "Students"
54
+
55
+ student_id = Column(Integer, primary_key=True, autoincrement=True)
56
+ student_details = Column(String(500))
57
+
58
+ # Relationships
59
+ course_registrations = relationship(
60
+ "StudentCourseRegistration", back_populates="student"
61
+ )
62
+ course_attendance = relationship(
63
+ "StudentCourseAttendance", back_populates="student"
64
+ )
65
+
66
+
67
+ class Course(Base):
68
+ """Course information."""
69
+
70
+ __tablename__ = "Courses"
71
+
72
+ course_id = Column(String(50), primary_key=True)
73
+ course_name = Column(String(200), nullable=False)
74
+ course_description = Column(String(500))
75
+ other_details = Column(String(500))
76
+
77
+ # Relationships
78
+ course_registrations = relationship(
79
+ "StudentCourseRegistration", back_populates="course"
80
+ )
81
+ course_attendance = relationship("StudentCourseAttendance", back_populates="course")
82
+
83
+
84
+ class PersonAddress(Base):
85
+ """Link between people and their addresses with date ranges."""
86
+
87
+ __tablename__ = "People_Addresses"
88
+
89
+ person_address_id = Column(Integer, primary_key=True, autoincrement=True)
90
+ person_id = Column(Integer, ForeignKey("People.person_id"), nullable=False)
91
+ address_id = Column(Integer, ForeignKey("Addresses.address_id"), nullable=False)
92
+ date_from = Column(DateTime)
93
+ date_to = Column(DateTime)
94
+
95
+ # Relationships
96
+ person = relationship("Person", back_populates="people_addresses")
97
+ address = relationship("Address", back_populates="people_addresses")
98
+
99
+
100
+ class StudentCourseRegistration(Base):
101
+ """Student registration for courses."""
102
+
103
+ __tablename__ = "Student_Course_Registrations"
104
+
105
+ student_id = Column(Integer, ForeignKey("Students.student_id"), primary_key=True)
106
+ course_id = Column(String(50), ForeignKey("Courses.course_id"), primary_key=True)
107
+ registration_date = Column(DateTime, default=datetime.utcnow)
108
+
109
+ # Relationships
110
+ student = relationship("Student", back_populates="course_registrations")
111
+ course = relationship("Course", back_populates="course_registrations")
112
+
113
+
114
+ class StudentCourseAttendance(Base):
115
+ """Student attendance records for courses."""
116
+
117
+ __tablename__ = "Student_Course_Attendance"
118
+
119
+ student_id = Column(Integer, ForeignKey("Students.student_id"), primary_key=True)
120
+ course_id = Column(String(50), ForeignKey("Courses.course_id"), primary_key=True)
121
+ date_of_attendance = Column(DateTime, primary_key=True)
122
+
123
+ # Relationships
124
+ student = relationship("Student", back_populates="course_attendance")
125
+ course = relationship("Course", back_populates="course_attendance")
126
+
127
+
128
+ class Candidate(Base):
129
+ """Candidate information."""
130
+
131
+ __tablename__ = "Candidates"
132
+
133
+ candidate_id = Column(Integer, primary_key=True, autoincrement=True)
134
+ candidate_details = Column(String(500))
135
+
136
+ # Relationships
137
+ assessments = relationship("CandidateAssessment", back_populates="candidate")
138
+
139
+
140
+ class CandidateAssessment(Base):
141
+ """Assessment records for candidates."""
142
+
143
+ __tablename__ = "Candidate_Assessments"
144
+
145
+ candidate_id = Column(
146
+ Integer, ForeignKey("Candidates.candidate_id"), primary_key=True
147
+ )
148
+ qualification = Column(String(200), primary_key=True)
149
+ assessment_date = Column(DateTime, primary_key=True)
150
+ asessment_outcome_code = Column(String(50))
151
+
152
+ # Relationships
153
+ candidate = relationship("Candidate", back_populates="assessments")
data/questions/db_list.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "student_assessment",
3
+ "concert_singer",
4
+ "world_1",
5
+ "car_1",
6
+ "employee_hire_evaluation",
7
+ "pets_1",
8
+ "cre_Doc_Template_Mgt",
9
+ "dog_kennels",
10
+ "flight_2",
11
+ "poker_player"
12
+ ]
data/questions/questions_eval.json ADDED
The diff for this file is too large to render. See raw diff
 
data/questions/questions_train.json ADDED
The diff for this file is too large to render. See raw diff
 
data/questions/student_assessment.json ADDED
@@ -0,0 +1,3355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "db_id": "student_assessment",
4
+ "query": "SELECT T1.course_name FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_Id GROUP BY T1.course_id ORDER BY count(*) DESC LIMIT 1",
5
+ "question": "which course has most number of registered students?",
6
+ "query_toks": [
7
+ "SELECT",
8
+ "T1.course_name",
9
+ "FROM",
10
+ "courses",
11
+ "AS",
12
+ "T1",
13
+ "JOIN",
14
+ "student_course_registrations",
15
+ "AS",
16
+ "T2",
17
+ "ON",
18
+ "T1.course_id",
19
+ "=",
20
+ "T2.course_Id",
21
+ "GROUP",
22
+ "BY",
23
+ "T1.course_id",
24
+ "ORDER",
25
+ "BY",
26
+ "count",
27
+ "(",
28
+ "*",
29
+ ")",
30
+ "DESC",
31
+ "LIMIT",
32
+ "1"
33
+ ],
34
+ "query_toks_no_value": [
35
+ "select",
36
+ "t1",
37
+ ".",
38
+ "course_name",
39
+ "from",
40
+ "courses",
41
+ "as",
42
+ "t1",
43
+ "join",
44
+ "student_course_registrations",
45
+ "as",
46
+ "t2",
47
+ "on",
48
+ "t1",
49
+ ".",
50
+ "course_id",
51
+ "=",
52
+ "t2",
53
+ ".",
54
+ "course_id",
55
+ "group",
56
+ "by",
57
+ "t1",
58
+ ".",
59
+ "course_id",
60
+ "order",
61
+ "by",
62
+ "count",
63
+ "(",
64
+ "*",
65
+ ")",
66
+ "desc",
67
+ "limit",
68
+ "value"
69
+ ],
70
+ "question_toks": [
71
+ "which",
72
+ "course",
73
+ "has",
74
+ "most",
75
+ "number",
76
+ "of",
77
+ "registered",
78
+ "students",
79
+ "?"
80
+ ]
81
+ },
82
+ {
83
+ "db_id": "student_assessment",
84
+ "query": "SELECT T1.course_name FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_Id GROUP BY T1.course_id ORDER BY count(*) DESC LIMIT 1",
85
+ "question": "What is the name of the course with the most registered students?",
86
+ "query_toks": [
87
+ "SELECT",
88
+ "T1.course_name",
89
+ "FROM",
90
+ "courses",
91
+ "AS",
92
+ "T1",
93
+ "JOIN",
94
+ "student_course_registrations",
95
+ "AS",
96
+ "T2",
97
+ "ON",
98
+ "T1.course_id",
99
+ "=",
100
+ "T2.course_Id",
101
+ "GROUP",
102
+ "BY",
103
+ "T1.course_id",
104
+ "ORDER",
105
+ "BY",
106
+ "count",
107
+ "(",
108
+ "*",
109
+ ")",
110
+ "DESC",
111
+ "LIMIT",
112
+ "1"
113
+ ],
114
+ "query_toks_no_value": [
115
+ "select",
116
+ "t1",
117
+ ".",
118
+ "course_name",
119
+ "from",
120
+ "courses",
121
+ "as",
122
+ "t1",
123
+ "join",
124
+ "student_course_registrations",
125
+ "as",
126
+ "t2",
127
+ "on",
128
+ "t1",
129
+ ".",
130
+ "course_id",
131
+ "=",
132
+ "t2",
133
+ ".",
134
+ "course_id",
135
+ "group",
136
+ "by",
137
+ "t1",
138
+ ".",
139
+ "course_id",
140
+ "order",
141
+ "by",
142
+ "count",
143
+ "(",
144
+ "*",
145
+ ")",
146
+ "desc",
147
+ "limit",
148
+ "value"
149
+ ],
150
+ "question_toks": [
151
+ "What",
152
+ "is",
153
+ "the",
154
+ "name",
155
+ "of",
156
+ "the",
157
+ "course",
158
+ "with",
159
+ "the",
160
+ "most",
161
+ "registered",
162
+ "students",
163
+ "?"
164
+ ]
165
+ },
166
+ {
167
+ "db_id": "student_assessment",
168
+ "query": "SELECT student_id FROM student_course_registrations GROUP BY student_id ORDER BY count(*) LIMIT 1",
169
+ "question": "what is id of students who registered some courses but the least number of courses in these students?",
170
+ "query_toks": [
171
+ "SELECT",
172
+ "student_id",
173
+ "FROM",
174
+ "student_course_registrations",
175
+ "GROUP",
176
+ "BY",
177
+ "student_id",
178
+ "ORDER",
179
+ "BY",
180
+ "count",
181
+ "(",
182
+ "*",
183
+ ")",
184
+ "LIMIT",
185
+ "1"
186
+ ],
187
+ "query_toks_no_value": [
188
+ "select",
189
+ "student_id",
190
+ "from",
191
+ "student_course_registrations",
192
+ "group",
193
+ "by",
194
+ "student_id",
195
+ "order",
196
+ "by",
197
+ "count",
198
+ "(",
199
+ "*",
200
+ ")",
201
+ "limit",
202
+ "value"
203
+ ],
204
+ "question_toks": [
205
+ "what",
206
+ "is",
207
+ "id",
208
+ "of",
209
+ "students",
210
+ "who",
211
+ "registered",
212
+ "some",
213
+ "courses",
214
+ "but",
215
+ "the",
216
+ "least",
217
+ "number",
218
+ "of",
219
+ "courses",
220
+ "in",
221
+ "these",
222
+ "students",
223
+ "?"
224
+ ]
225
+ },
226
+ {
227
+ "db_id": "student_assessment",
228
+ "query": "SELECT student_id FROM student_course_registrations GROUP BY student_id ORDER BY count(*) LIMIT 1",
229
+ "question": "What are the ids of the students who registered for some courses but had the least number of courses for all students?",
230
+ "query_toks": [
231
+ "SELECT",
232
+ "student_id",
233
+ "FROM",
234
+ "student_course_registrations",
235
+ "GROUP",
236
+ "BY",
237
+ "student_id",
238
+ "ORDER",
239
+ "BY",
240
+ "count",
241
+ "(",
242
+ "*",
243
+ ")",
244
+ "LIMIT",
245
+ "1"
246
+ ],
247
+ "query_toks_no_value": [
248
+ "select",
249
+ "student_id",
250
+ "from",
251
+ "student_course_registrations",
252
+ "group",
253
+ "by",
254
+ "student_id",
255
+ "order",
256
+ "by",
257
+ "count",
258
+ "(",
259
+ "*",
260
+ ")",
261
+ "limit",
262
+ "value"
263
+ ],
264
+ "question_toks": [
265
+ "What",
266
+ "are",
267
+ "the",
268
+ "ids",
269
+ "of",
270
+ "the",
271
+ "students",
272
+ "who",
273
+ "registered",
274
+ "for",
275
+ "some",
276
+ "courses",
277
+ "but",
278
+ "had",
279
+ "the",
280
+ "least",
281
+ "number",
282
+ "of",
283
+ "courses",
284
+ "for",
285
+ "all",
286
+ "students",
287
+ "?"
288
+ ]
289
+ },
290
+ {
291
+ "db_id": "student_assessment",
292
+ "query": "SELECT T2.first_name , T2.last_name FROM candidates AS T1 JOIN people AS T2 ON T1.candidate_id = T2.person_id",
293
+ "question": "what are the first name and last name of all candidates?",
294
+ "query_toks": [
295
+ "SELECT",
296
+ "T2.first_name",
297
+ ",",
298
+ "T2.last_name",
299
+ "FROM",
300
+ "candidates",
301
+ "AS",
302
+ "T1",
303
+ "JOIN",
304
+ "people",
305
+ "AS",
306
+ "T2",
307
+ "ON",
308
+ "T1.candidate_id",
309
+ "=",
310
+ "T2.person_id"
311
+ ],
312
+ "query_toks_no_value": [
313
+ "select",
314
+ "t2",
315
+ ".",
316
+ "first_name",
317
+ ",",
318
+ "t2",
319
+ ".",
320
+ "last_name",
321
+ "from",
322
+ "candidates",
323
+ "as",
324
+ "t1",
325
+ "join",
326
+ "people",
327
+ "as",
328
+ "t2",
329
+ "on",
330
+ "t1",
331
+ ".",
332
+ "candidate_id",
333
+ "=",
334
+ "t2",
335
+ ".",
336
+ "person_id"
337
+ ],
338
+ "question_toks": [
339
+ "what",
340
+ "are",
341
+ "the",
342
+ "first",
343
+ "name",
344
+ "and",
345
+ "last",
346
+ "name",
347
+ "of",
348
+ "all",
349
+ "candidates",
350
+ "?"
351
+ ]
352
+ },
353
+ {
354
+ "db_id": "student_assessment",
355
+ "query": "SELECT T2.first_name , T2.last_name FROM candidates AS T1 JOIN people AS T2 ON T1.candidate_id = T2.person_id",
356
+ "question": "What are the first and last names of all the candidates?",
357
+ "query_toks": [
358
+ "SELECT",
359
+ "T2.first_name",
360
+ ",",
361
+ "T2.last_name",
362
+ "FROM",
363
+ "candidates",
364
+ "AS",
365
+ "T1",
366
+ "JOIN",
367
+ "people",
368
+ "AS",
369
+ "T2",
370
+ "ON",
371
+ "T1.candidate_id",
372
+ "=",
373
+ "T2.person_id"
374
+ ],
375
+ "query_toks_no_value": [
376
+ "select",
377
+ "t2",
378
+ ".",
379
+ "first_name",
380
+ ",",
381
+ "t2",
382
+ ".",
383
+ "last_name",
384
+ "from",
385
+ "candidates",
386
+ "as",
387
+ "t1",
388
+ "join",
389
+ "people",
390
+ "as",
391
+ "t2",
392
+ "on",
393
+ "t1",
394
+ ".",
395
+ "candidate_id",
396
+ "=",
397
+ "t2",
398
+ ".",
399
+ "person_id"
400
+ ],
401
+ "question_toks": [
402
+ "What",
403
+ "are",
404
+ "the",
405
+ "first",
406
+ "and",
407
+ "last",
408
+ "names",
409
+ "of",
410
+ "all",
411
+ "the",
412
+ "candidates",
413
+ "?"
414
+ ]
415
+ },
416
+ {
417
+ "db_id": "student_assessment",
418
+ "query": "SELECT student_id FROM students WHERE student_id NOT IN (SELECT student_id FROM student_course_attendance)",
419
+ "question": "List the id of students who never attends courses?",
420
+ "query_toks": [
421
+ "SELECT",
422
+ "student_id",
423
+ "FROM",
424
+ "students",
425
+ "WHERE",
426
+ "student_id",
427
+ "NOT",
428
+ "IN",
429
+ "(",
430
+ "SELECT",
431
+ "student_id",
432
+ "FROM",
433
+ "student_course_attendance",
434
+ ")"
435
+ ],
436
+ "query_toks_no_value": [
437
+ "select",
438
+ "student_id",
439
+ "from",
440
+ "students",
441
+ "where",
442
+ "student_id",
443
+ "not",
444
+ "in",
445
+ "(",
446
+ "select",
447
+ "student_id",
448
+ "from",
449
+ "student_course_attendance",
450
+ ")"
451
+ ],
452
+ "question_toks": [
453
+ "List",
454
+ "the",
455
+ "id",
456
+ "of",
457
+ "students",
458
+ "who",
459
+ "never",
460
+ "attends",
461
+ "courses",
462
+ "?"
463
+ ]
464
+ },
465
+ {
466
+ "db_id": "student_assessment",
467
+ "query": "SELECT student_id FROM students WHERE student_id NOT IN (SELECT student_id FROM student_course_attendance)",
468
+ "question": "What are the ids of every student who has never attended a course?",
469
+ "query_toks": [
470
+ "SELECT",
471
+ "student_id",
472
+ "FROM",
473
+ "students",
474
+ "WHERE",
475
+ "student_id",
476
+ "NOT",
477
+ "IN",
478
+ "(",
479
+ "SELECT",
480
+ "student_id",
481
+ "FROM",
482
+ "student_course_attendance",
483
+ ")"
484
+ ],
485
+ "query_toks_no_value": [
486
+ "select",
487
+ "student_id",
488
+ "from",
489
+ "students",
490
+ "where",
491
+ "student_id",
492
+ "not",
493
+ "in",
494
+ "(",
495
+ "select",
496
+ "student_id",
497
+ "from",
498
+ "student_course_attendance",
499
+ ")"
500
+ ],
501
+ "question_toks": [
502
+ "What",
503
+ "are",
504
+ "the",
505
+ "ids",
506
+ "of",
507
+ "every",
508
+ "student",
509
+ "who",
510
+ "has",
511
+ "never",
512
+ "attended",
513
+ "a",
514
+ "course",
515
+ "?"
516
+ ]
517
+ },
518
+ {
519
+ "db_id": "student_assessment",
520
+ "query": "SELECT student_id FROM student_course_attendance",
521
+ "question": "List the id of students who attended some courses?",
522
+ "query_toks": [
523
+ "SELECT",
524
+ "student_id",
525
+ "FROM",
526
+ "student_course_attendance"
527
+ ],
528
+ "query_toks_no_value": [
529
+ "select",
530
+ "student_id",
531
+ "from",
532
+ "student_course_attendance"
533
+ ],
534
+ "question_toks": [
535
+ "List",
536
+ "the",
537
+ "id",
538
+ "of",
539
+ "students",
540
+ "who",
541
+ "attended",
542
+ "some",
543
+ "courses",
544
+ "?"
545
+ ]
546
+ },
547
+ {
548
+ "db_id": "student_assessment",
549
+ "query": "SELECT student_id FROM student_course_attendance",
550
+ "question": "What are the ids of all students who have attended at least one course?",
551
+ "query_toks": [
552
+ "SELECT",
553
+ "student_id",
554
+ "FROM",
555
+ "student_course_attendance"
556
+ ],
557
+ "query_toks_no_value": [
558
+ "select",
559
+ "student_id",
560
+ "from",
561
+ "student_course_attendance"
562
+ ],
563
+ "question_toks": [
564
+ "What",
565
+ "are",
566
+ "the",
567
+ "ids",
568
+ "of",
569
+ "all",
570
+ "students",
571
+ "who",
572
+ "have",
573
+ "attended",
574
+ "at",
575
+ "least",
576
+ "one",
577
+ "course",
578
+ "?"
579
+ ]
580
+ },
581
+ {
582
+ "db_id": "student_assessment",
583
+ "query": "SELECT T1.student_id , T2.course_name FROM student_course_registrations AS T1 JOIN courses AS T2 ON T1.course_id = T2.course_id",
584
+ "question": "What are the ids of all students for courses and what are the names of those courses?",
585
+ "query_toks": [
586
+ "SELECT",
587
+ "T1.student_id",
588
+ ",",
589
+ "T2.course_name",
590
+ "FROM",
591
+ "student_course_registrations",
592
+ "AS",
593
+ "T1",
594
+ "JOIN",
595
+ "courses",
596
+ "AS",
597
+ "T2",
598
+ "ON",
599
+ "T1.course_id",
600
+ "=",
601
+ "T2.course_id"
602
+ ],
603
+ "query_toks_no_value": [
604
+ "select",
605
+ "t1",
606
+ ".",
607
+ "student_id",
608
+ ",",
609
+ "t2",
610
+ ".",
611
+ "course_name",
612
+ "from",
613
+ "student_course_registrations",
614
+ "as",
615
+ "t1",
616
+ "join",
617
+ "courses",
618
+ "as",
619
+ "t2",
620
+ "on",
621
+ "t1",
622
+ ".",
623
+ "course_id",
624
+ "=",
625
+ "t2",
626
+ ".",
627
+ "course_id"
628
+ ],
629
+ "question_toks": [
630
+ "What",
631
+ "are",
632
+ "the",
633
+ "ids",
634
+ "of",
635
+ "all",
636
+ "students",
637
+ "for",
638
+ "courses",
639
+ "and",
640
+ "what",
641
+ "are",
642
+ "the",
643
+ "names",
644
+ "of",
645
+ "those",
646
+ "courses",
647
+ "?"
648
+ ]
649
+ },
650
+ {
651
+ "db_id": "student_assessment",
652
+ "query": "SELECT T2.student_details FROM student_course_registrations AS T1 JOIN students AS T2 ON T1.student_id = T2.student_id ORDER BY T1.registration_date DESC LIMIT 1",
653
+ "question": "What is detail of the student who most recently registered course?",
654
+ "query_toks": [
655
+ "SELECT",
656
+ "T2.student_details",
657
+ "FROM",
658
+ "student_course_registrations",
659
+ "AS",
660
+ "T1",
661
+ "JOIN",
662
+ "students",
663
+ "AS",
664
+ "T2",
665
+ "ON",
666
+ "T1.student_id",
667
+ "=",
668
+ "T2.student_id",
669
+ "ORDER",
670
+ "BY",
671
+ "T1.registration_date",
672
+ "DESC",
673
+ "LIMIT",
674
+ "1"
675
+ ],
676
+ "query_toks_no_value": [
677
+ "select",
678
+ "t2",
679
+ ".",
680
+ "student_details",
681
+ "from",
682
+ "student_course_registrations",
683
+ "as",
684
+ "t1",
685
+ "join",
686
+ "students",
687
+ "as",
688
+ "t2",
689
+ "on",
690
+ "t1",
691
+ ".",
692
+ "student_id",
693
+ "=",
694
+ "t2",
695
+ ".",
696
+ "student_id",
697
+ "order",
698
+ "by",
699
+ "t1",
700
+ ".",
701
+ "registration_date",
702
+ "desc",
703
+ "limit",
704
+ "value"
705
+ ],
706
+ "question_toks": [
707
+ "What",
708
+ "is",
709
+ "detail",
710
+ "of",
711
+ "the",
712
+ "student",
713
+ "who",
714
+ "most",
715
+ "recently",
716
+ "registered",
717
+ "course",
718
+ "?"
719
+ ]
720
+ },
721
+ {
722
+ "db_id": "student_assessment",
723
+ "query": "SELECT T2.student_details FROM student_course_registrations AS T1 JOIN students AS T2 ON T1.student_id = T2.student_id ORDER BY T1.registration_date DESC LIMIT 1",
724
+ "question": "What details do we have on the students who registered for courses most recently?",
725
+ "query_toks": [
726
+ "SELECT",
727
+ "T2.student_details",
728
+ "FROM",
729
+ "student_course_registrations",
730
+ "AS",
731
+ "T1",
732
+ "JOIN",
733
+ "students",
734
+ "AS",
735
+ "T2",
736
+ "ON",
737
+ "T1.student_id",
738
+ "=",
739
+ "T2.student_id",
740
+ "ORDER",
741
+ "BY",
742
+ "T1.registration_date",
743
+ "DESC",
744
+ "LIMIT",
745
+ "1"
746
+ ],
747
+ "query_toks_no_value": [
748
+ "select",
749
+ "t2",
750
+ ".",
751
+ "student_details",
752
+ "from",
753
+ "student_course_registrations",
754
+ "as",
755
+ "t1",
756
+ "join",
757
+ "students",
758
+ "as",
759
+ "t2",
760
+ "on",
761
+ "t1",
762
+ ".",
763
+ "student_id",
764
+ "=",
765
+ "t2",
766
+ ".",
767
+ "student_id",
768
+ "order",
769
+ "by",
770
+ "t1",
771
+ ".",
772
+ "registration_date",
773
+ "desc",
774
+ "limit",
775
+ "value"
776
+ ],
777
+ "question_toks": [
778
+ "What",
779
+ "details",
780
+ "do",
781
+ "we",
782
+ "have",
783
+ "on",
784
+ "the",
785
+ "students",
786
+ "who",
787
+ "registered",
788
+ "for",
789
+ "courses",
790
+ "most",
791
+ "recently",
792
+ "?"
793
+ ]
794
+ },
795
+ {
796
+ "db_id": "student_assessment",
797
+ "query": "SELECT count(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"English\"",
798
+ "question": "How many students attend course English?",
799
+ "query_toks": [
800
+ "SELECT",
801
+ "count",
802
+ "(",
803
+ "*",
804
+ ")",
805
+ "FROM",
806
+ "courses",
807
+ "AS",
808
+ "T1",
809
+ "JOIN",
810
+ "student_course_attendance",
811
+ "AS",
812
+ "T2",
813
+ "ON",
814
+ "T1.course_id",
815
+ "=",
816
+ "T2.course_id",
817
+ "WHERE",
818
+ "T1.course_name",
819
+ "=",
820
+ "``",
821
+ "English",
822
+ "''"
823
+ ],
824
+ "query_toks_no_value": [
825
+ "select",
826
+ "count",
827
+ "(",
828
+ "*",
829
+ ")",
830
+ "from",
831
+ "courses",
832
+ "as",
833
+ "t1",
834
+ "join",
835
+ "student_course_attendance",
836
+ "as",
837
+ "t2",
838
+ "on",
839
+ "t1",
840
+ ".",
841
+ "course_id",
842
+ "=",
843
+ "t2",
844
+ ".",
845
+ "course_id",
846
+ "where",
847
+ "t1",
848
+ ".",
849
+ "course_name",
850
+ "=",
851
+ "value"
852
+ ],
853
+ "question_toks": [
854
+ "How",
855
+ "many",
856
+ "students",
857
+ "attend",
858
+ "course",
859
+ "English",
860
+ "?"
861
+ ]
862
+ },
863
+ {
864
+ "db_id": "student_assessment",
865
+ "query": "SELECT count(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"English\"",
866
+ "question": "How many students are attending English courses?",
867
+ "query_toks": [
868
+ "SELECT",
869
+ "count",
870
+ "(",
871
+ "*",
872
+ ")",
873
+ "FROM",
874
+ "courses",
875
+ "AS",
876
+ "T1",
877
+ "JOIN",
878
+ "student_course_attendance",
879
+ "AS",
880
+ "T2",
881
+ "ON",
882
+ "T1.course_id",
883
+ "=",
884
+ "T2.course_id",
885
+ "WHERE",
886
+ "T1.course_name",
887
+ "=",
888
+ "``",
889
+ "English",
890
+ "''"
891
+ ],
892
+ "query_toks_no_value": [
893
+ "select",
894
+ "count",
895
+ "(",
896
+ "*",
897
+ ")",
898
+ "from",
899
+ "courses",
900
+ "as",
901
+ "t1",
902
+ "join",
903
+ "student_course_attendance",
904
+ "as",
905
+ "t2",
906
+ "on",
907
+ "t1",
908
+ ".",
909
+ "course_id",
910
+ "=",
911
+ "t2",
912
+ ".",
913
+ "course_id",
914
+ "where",
915
+ "t1",
916
+ ".",
917
+ "course_name",
918
+ "=",
919
+ "value"
920
+ ],
921
+ "question_toks": [
922
+ "How",
923
+ "many",
924
+ "students",
925
+ "are",
926
+ "attending",
927
+ "English",
928
+ "courses",
929
+ "?"
930
+ ]
931
+ },
932
+ {
933
+ "db_id": "student_assessment",
934
+ "query": "SELECT count(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T2.student_id = 171",
935
+ "question": "How many courses do the student whose id is 171 attend?",
936
+ "query_toks": [
937
+ "SELECT",
938
+ "count",
939
+ "(",
940
+ "*",
941
+ ")",
942
+ "FROM",
943
+ "courses",
944
+ "AS",
945
+ "T1",
946
+ "JOIN",
947
+ "student_course_attendance",
948
+ "AS",
949
+ "T2",
950
+ "ON",
951
+ "T1.course_id",
952
+ "=",
953
+ "T2.course_id",
954
+ "WHERE",
955
+ "T2.student_id",
956
+ "=",
957
+ "171"
958
+ ],
959
+ "query_toks_no_value": [
960
+ "select",
961
+ "count",
962
+ "(",
963
+ "*",
964
+ ")",
965
+ "from",
966
+ "courses",
967
+ "as",
968
+ "t1",
969
+ "join",
970
+ "student_course_attendance",
971
+ "as",
972
+ "t2",
973
+ "on",
974
+ "t1",
975
+ ".",
976
+ "course_id",
977
+ "=",
978
+ "t2",
979
+ ".",
980
+ "course_id",
981
+ "where",
982
+ "t2",
983
+ ".",
984
+ "student_id",
985
+ "=",
986
+ "value"
987
+ ],
988
+ "question_toks": [
989
+ "How",
990
+ "many",
991
+ "courses",
992
+ "do",
993
+ "the",
994
+ "student",
995
+ "whose",
996
+ "id",
997
+ "is",
998
+ "171",
999
+ "attend",
1000
+ "?"
1001
+ ]
1002
+ },
1003
+ {
1004
+ "db_id": "student_assessment",
1005
+ "query": "SELECT count(*) FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T2.student_id = 171",
1006
+ "question": "How many courses does the student with id 171 actually attend?",
1007
+ "query_toks": [
1008
+ "SELECT",
1009
+ "count",
1010
+ "(",
1011
+ "*",
1012
+ ")",
1013
+ "FROM",
1014
+ "courses",
1015
+ "AS",
1016
+ "T1",
1017
+ "JOIN",
1018
+ "student_course_attendance",
1019
+ "AS",
1020
+ "T2",
1021
+ "ON",
1022
+ "T1.course_id",
1023
+ "=",
1024
+ "T2.course_id",
1025
+ "WHERE",
1026
+ "T2.student_id",
1027
+ "=",
1028
+ "171"
1029
+ ],
1030
+ "query_toks_no_value": [
1031
+ "select",
1032
+ "count",
1033
+ "(",
1034
+ "*",
1035
+ ")",
1036
+ "from",
1037
+ "courses",
1038
+ "as",
1039
+ "t1",
1040
+ "join",
1041
+ "student_course_attendance",
1042
+ "as",
1043
+ "t2",
1044
+ "on",
1045
+ "t1",
1046
+ ".",
1047
+ "course_id",
1048
+ "=",
1049
+ "t2",
1050
+ ".",
1051
+ "course_id",
1052
+ "where",
1053
+ "t2",
1054
+ ".",
1055
+ "student_id",
1056
+ "=",
1057
+ "value"
1058
+ ],
1059
+ "question_toks": [
1060
+ "How",
1061
+ "many",
1062
+ "courses",
1063
+ "does",
1064
+ "the",
1065
+ "student",
1066
+ "with",
1067
+ "id",
1068
+ "171",
1069
+ "actually",
1070
+ "attend",
1071
+ "?"
1072
+ ]
1073
+ },
1074
+ {
1075
+ "db_id": "student_assessment",
1076
+ "query": "SELECT T2.candidate_id FROM people AS T1 JOIN candidates AS T2 ON T1.person_id = T2.candidate_id WHERE T1.email_address = \"stanley.monahan@example.org\"",
1077
+ "question": "Find id of the candidate whose email is stanley.monahan@example.org?",
1078
+ "query_toks": [
1079
+ "SELECT",
1080
+ "T2.candidate_id",
1081
+ "FROM",
1082
+ "people",
1083
+ "AS",
1084
+ "T1",
1085
+ "JOIN",
1086
+ "candidates",
1087
+ "AS",
1088
+ "T2",
1089
+ "ON",
1090
+ "T1.person_id",
1091
+ "=",
1092
+ "T2.candidate_id",
1093
+ "WHERE",
1094
+ "T1.email_address",
1095
+ "=",
1096
+ "``",
1097
+ "stanley.monahan",
1098
+ "@",
1099
+ "example.org",
1100
+ "''"
1101
+ ],
1102
+ "query_toks_no_value": [
1103
+ "select",
1104
+ "t2",
1105
+ ".",
1106
+ "candidate_id",
1107
+ "from",
1108
+ "people",
1109
+ "as",
1110
+ "t1",
1111
+ "join",
1112
+ "candidates",
1113
+ "as",
1114
+ "t2",
1115
+ "on",
1116
+ "t1",
1117
+ ".",
1118
+ "person_id",
1119
+ "=",
1120
+ "t2",
1121
+ ".",
1122
+ "candidate_id",
1123
+ "where",
1124
+ "t1",
1125
+ ".",
1126
+ "email_address",
1127
+ "=",
1128
+ "value"
1129
+ ],
1130
+ "question_toks": [
1131
+ "Find",
1132
+ "id",
1133
+ "of",
1134
+ "the",
1135
+ "candidate",
1136
+ "whose",
1137
+ "email",
1138
+ "is",
1139
+ "stanley.monahan",
1140
+ "@",
1141
+ "example.org",
1142
+ "?"
1143
+ ]
1144
+ },
1145
+ {
1146
+ "db_id": "student_assessment",
1147
+ "query": "SELECT T2.candidate_id FROM people AS T1 JOIN candidates AS T2 ON T1.person_id = T2.candidate_id WHERE T1.email_address = \"stanley.monahan@example.org\"",
1148
+ "question": "What is the id of the candidate whose email is stanley.monahan@example.org?",
1149
+ "query_toks": [
1150
+ "SELECT",
1151
+ "T2.candidate_id",
1152
+ "FROM",
1153
+ "people",
1154
+ "AS",
1155
+ "T1",
1156
+ "JOIN",
1157
+ "candidates",
1158
+ "AS",
1159
+ "T2",
1160
+ "ON",
1161
+ "T1.person_id",
1162
+ "=",
1163
+ "T2.candidate_id",
1164
+ "WHERE",
1165
+ "T1.email_address",
1166
+ "=",
1167
+ "``",
1168
+ "stanley.monahan",
1169
+ "@",
1170
+ "example.org",
1171
+ "''"
1172
+ ],
1173
+ "query_toks_no_value": [
1174
+ "select",
1175
+ "t2",
1176
+ ".",
1177
+ "candidate_id",
1178
+ "from",
1179
+ "people",
1180
+ "as",
1181
+ "t1",
1182
+ "join",
1183
+ "candidates",
1184
+ "as",
1185
+ "t2",
1186
+ "on",
1187
+ "t1",
1188
+ ".",
1189
+ "person_id",
1190
+ "=",
1191
+ "t2",
1192
+ ".",
1193
+ "candidate_id",
1194
+ "where",
1195
+ "t1",
1196
+ ".",
1197
+ "email_address",
1198
+ "=",
1199
+ "value"
1200
+ ],
1201
+ "question_toks": [
1202
+ "What",
1203
+ "is",
1204
+ "the",
1205
+ "id",
1206
+ "of",
1207
+ "the",
1208
+ "candidate",
1209
+ "whose",
1210
+ "email",
1211
+ "is",
1212
+ "stanley.monahan",
1213
+ "@",
1214
+ "example.org",
1215
+ "?"
1216
+ ]
1217
+ },
1218
+ {
1219
+ "db_id": "student_assessment",
1220
+ "query": "SELECT candidate_id FROM candidate_assessments ORDER BY assessment_date DESC LIMIT 1",
1221
+ "question": "Find id of the candidate who most recently accessed the course?",
1222
+ "query_toks": [
1223
+ "SELECT",
1224
+ "candidate_id",
1225
+ "FROM",
1226
+ "candidate_assessments",
1227
+ "ORDER",
1228
+ "BY",
1229
+ "assessment_date",
1230
+ "DESC",
1231
+ "LIMIT",
1232
+ "1"
1233
+ ],
1234
+ "query_toks_no_value": [
1235
+ "select",
1236
+ "candidate_id",
1237
+ "from",
1238
+ "candidate_assessments",
1239
+ "order",
1240
+ "by",
1241
+ "assessment_date",
1242
+ "desc",
1243
+ "limit",
1244
+ "value"
1245
+ ],
1246
+ "question_toks": [
1247
+ "Find",
1248
+ "id",
1249
+ "of",
1250
+ "the",
1251
+ "candidate",
1252
+ "who",
1253
+ "most",
1254
+ "recently",
1255
+ "accessed",
1256
+ "the",
1257
+ "course",
1258
+ "?"
1259
+ ]
1260
+ },
1261
+ {
1262
+ "db_id": "student_assessment",
1263
+ "query": "SELECT candidate_id FROM candidate_assessments ORDER BY assessment_date DESC LIMIT 1",
1264
+ "question": "What is the id of the candidate who most recently accessed the course?",
1265
+ "query_toks": [
1266
+ "SELECT",
1267
+ "candidate_id",
1268
+ "FROM",
1269
+ "candidate_assessments",
1270
+ "ORDER",
1271
+ "BY",
1272
+ "assessment_date",
1273
+ "DESC",
1274
+ "LIMIT",
1275
+ "1"
1276
+ ],
1277
+ "query_toks_no_value": [
1278
+ "select",
1279
+ "candidate_id",
1280
+ "from",
1281
+ "candidate_assessments",
1282
+ "order",
1283
+ "by",
1284
+ "assessment_date",
1285
+ "desc",
1286
+ "limit",
1287
+ "value"
1288
+ ],
1289
+ "question_toks": [
1290
+ "What",
1291
+ "is",
1292
+ "the",
1293
+ "id",
1294
+ "of",
1295
+ "the",
1296
+ "candidate",
1297
+ "who",
1298
+ "most",
1299
+ "recently",
1300
+ "accessed",
1301
+ "the",
1302
+ "course",
1303
+ "?"
1304
+ ]
1305
+ },
1306
+ {
1307
+ "db_id": "student_assessment",
1308
+ "query": "SELECT T1.student_details FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id ORDER BY count(*) DESC LIMIT 1",
1309
+ "question": "What is detail of the student who registered the most number of courses?",
1310
+ "query_toks": [
1311
+ "SELECT",
1312
+ "T1.student_details",
1313
+ "FROM",
1314
+ "students",
1315
+ "AS",
1316
+ "T1",
1317
+ "JOIN",
1318
+ "student_course_registrations",
1319
+ "AS",
1320
+ "T2",
1321
+ "ON",
1322
+ "T1.student_id",
1323
+ "=",
1324
+ "T2.student_id",
1325
+ "GROUP",
1326
+ "BY",
1327
+ "T1.student_id",
1328
+ "ORDER",
1329
+ "BY",
1330
+ "count",
1331
+ "(",
1332
+ "*",
1333
+ ")",
1334
+ "DESC",
1335
+ "LIMIT",
1336
+ "1"
1337
+ ],
1338
+ "query_toks_no_value": [
1339
+ "select",
1340
+ "t1",
1341
+ ".",
1342
+ "student_details",
1343
+ "from",
1344
+ "students",
1345
+ "as",
1346
+ "t1",
1347
+ "join",
1348
+ "student_course_registrations",
1349
+ "as",
1350
+ "t2",
1351
+ "on",
1352
+ "t1",
1353
+ ".",
1354
+ "student_id",
1355
+ "=",
1356
+ "t2",
1357
+ ".",
1358
+ "student_id",
1359
+ "group",
1360
+ "by",
1361
+ "t1",
1362
+ ".",
1363
+ "student_id",
1364
+ "order",
1365
+ "by",
1366
+ "count",
1367
+ "(",
1368
+ "*",
1369
+ ")",
1370
+ "desc",
1371
+ "limit",
1372
+ "value"
1373
+ ],
1374
+ "question_toks": [
1375
+ "What",
1376
+ "is",
1377
+ "detail",
1378
+ "of",
1379
+ "the",
1380
+ "student",
1381
+ "who",
1382
+ "registered",
1383
+ "the",
1384
+ "most",
1385
+ "number",
1386
+ "of",
1387
+ "courses",
1388
+ "?"
1389
+ ]
1390
+ },
1391
+ {
1392
+ "db_id": "student_assessment",
1393
+ "query": "SELECT T1.student_details FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id ORDER BY count(*) DESC LIMIT 1",
1394
+ "question": "What are the details of the student who registered for the most number of courses?",
1395
+ "query_toks": [
1396
+ "SELECT",
1397
+ "T1.student_details",
1398
+ "FROM",
1399
+ "students",
1400
+ "AS",
1401
+ "T1",
1402
+ "JOIN",
1403
+ "student_course_registrations",
1404
+ "AS",
1405
+ "T2",
1406
+ "ON",
1407
+ "T1.student_id",
1408
+ "=",
1409
+ "T2.student_id",
1410
+ "GROUP",
1411
+ "BY",
1412
+ "T1.student_id",
1413
+ "ORDER",
1414
+ "BY",
1415
+ "count",
1416
+ "(",
1417
+ "*",
1418
+ ")",
1419
+ "DESC",
1420
+ "LIMIT",
1421
+ "1"
1422
+ ],
1423
+ "query_toks_no_value": [
1424
+ "select",
1425
+ "t1",
1426
+ ".",
1427
+ "student_details",
1428
+ "from",
1429
+ "students",
1430
+ "as",
1431
+ "t1",
1432
+ "join",
1433
+ "student_course_registrations",
1434
+ "as",
1435
+ "t2",
1436
+ "on",
1437
+ "t1",
1438
+ ".",
1439
+ "student_id",
1440
+ "=",
1441
+ "t2",
1442
+ ".",
1443
+ "student_id",
1444
+ "group",
1445
+ "by",
1446
+ "t1",
1447
+ ".",
1448
+ "student_id",
1449
+ "order",
1450
+ "by",
1451
+ "count",
1452
+ "(",
1453
+ "*",
1454
+ ")",
1455
+ "desc",
1456
+ "limit",
1457
+ "value"
1458
+ ],
1459
+ "question_toks": [
1460
+ "What",
1461
+ "are",
1462
+ "the",
1463
+ "details",
1464
+ "of",
1465
+ "the",
1466
+ "student",
1467
+ "who",
1468
+ "registered",
1469
+ "for",
1470
+ "the",
1471
+ "most",
1472
+ "number",
1473
+ "of",
1474
+ "courses",
1475
+ "?"
1476
+ ]
1477
+ },
1478
+ {
1479
+ "db_id": "student_assessment",
1480
+ "query": "SELECT T1.student_id , count(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id",
1481
+ "question": "List the id of students who registered some courses and the number of their registered courses?",
1482
+ "query_toks": [
1483
+ "SELECT",
1484
+ "T1.student_id",
1485
+ ",",
1486
+ "count",
1487
+ "(",
1488
+ "*",
1489
+ ")",
1490
+ "FROM",
1491
+ "students",
1492
+ "AS",
1493
+ "T1",
1494
+ "JOIN",
1495
+ "student_course_registrations",
1496
+ "AS",
1497
+ "T2",
1498
+ "ON",
1499
+ "T1.student_id",
1500
+ "=",
1501
+ "T2.student_id",
1502
+ "GROUP",
1503
+ "BY",
1504
+ "T1.student_id"
1505
+ ],
1506
+ "query_toks_no_value": [
1507
+ "select",
1508
+ "t1",
1509
+ ".",
1510
+ "student_id",
1511
+ ",",
1512
+ "count",
1513
+ "(",
1514
+ "*",
1515
+ ")",
1516
+ "from",
1517
+ "students",
1518
+ "as",
1519
+ "t1",
1520
+ "join",
1521
+ "student_course_registrations",
1522
+ "as",
1523
+ "t2",
1524
+ "on",
1525
+ "t1",
1526
+ ".",
1527
+ "student_id",
1528
+ "=",
1529
+ "t2",
1530
+ ".",
1531
+ "student_id",
1532
+ "group",
1533
+ "by",
1534
+ "t1",
1535
+ ".",
1536
+ "student_id"
1537
+ ],
1538
+ "question_toks": [
1539
+ "List",
1540
+ "the",
1541
+ "id",
1542
+ "of",
1543
+ "students",
1544
+ "who",
1545
+ "registered",
1546
+ "some",
1547
+ "courses",
1548
+ "and",
1549
+ "the",
1550
+ "number",
1551
+ "of",
1552
+ "their",
1553
+ "registered",
1554
+ "courses",
1555
+ "?"
1556
+ ]
1557
+ },
1558
+ {
1559
+ "db_id": "student_assessment",
1560
+ "query": "SELECT T1.student_id , count(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id",
1561
+ "question": "For every student who is registered for some course, how many courses are they registered for?",
1562
+ "query_toks": [
1563
+ "SELECT",
1564
+ "T1.student_id",
1565
+ ",",
1566
+ "count",
1567
+ "(",
1568
+ "*",
1569
+ ")",
1570
+ "FROM",
1571
+ "students",
1572
+ "AS",
1573
+ "T1",
1574
+ "JOIN",
1575
+ "student_course_registrations",
1576
+ "AS",
1577
+ "T2",
1578
+ "ON",
1579
+ "T1.student_id",
1580
+ "=",
1581
+ "T2.student_id",
1582
+ "GROUP",
1583
+ "BY",
1584
+ "T1.student_id"
1585
+ ],
1586
+ "query_toks_no_value": [
1587
+ "select",
1588
+ "t1",
1589
+ ".",
1590
+ "student_id",
1591
+ ",",
1592
+ "count",
1593
+ "(",
1594
+ "*",
1595
+ ")",
1596
+ "from",
1597
+ "students",
1598
+ "as",
1599
+ "t1",
1600
+ "join",
1601
+ "student_course_registrations",
1602
+ "as",
1603
+ "t2",
1604
+ "on",
1605
+ "t1",
1606
+ ".",
1607
+ "student_id",
1608
+ "=",
1609
+ "t2",
1610
+ ".",
1611
+ "student_id",
1612
+ "group",
1613
+ "by",
1614
+ "t1",
1615
+ ".",
1616
+ "student_id"
1617
+ ],
1618
+ "question_toks": [
1619
+ "For",
1620
+ "every",
1621
+ "student",
1622
+ "who",
1623
+ "is",
1624
+ "registered",
1625
+ "for",
1626
+ "some",
1627
+ "course",
1628
+ ",",
1629
+ "how",
1630
+ "many",
1631
+ "courses",
1632
+ "are",
1633
+ "they",
1634
+ "registered",
1635
+ "for",
1636
+ "?"
1637
+ ]
1638
+ },
1639
+ {
1640
+ "db_id": "student_assessment",
1641
+ "query": "SELECT T3.course_name , count(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id JOIN courses AS T3 ON T2.course_id = T3.course_id GROUP BY T2.course_id",
1642
+ "question": "How many registed students do each course have? List course name and the number of their registered students?",
1643
+ "query_toks": [
1644
+ "SELECT",
1645
+ "T3.course_name",
1646
+ ",",
1647
+ "count",
1648
+ "(",
1649
+ "*",
1650
+ ")",
1651
+ "FROM",
1652
+ "students",
1653
+ "AS",
1654
+ "T1",
1655
+ "JOIN",
1656
+ "student_course_registrations",
1657
+ "AS",
1658
+ "T2",
1659
+ "ON",
1660
+ "T1.student_id",
1661
+ "=",
1662
+ "T2.student_id",
1663
+ "JOIN",
1664
+ "courses",
1665
+ "AS",
1666
+ "T3",
1667
+ "ON",
1668
+ "T2.course_id",
1669
+ "=",
1670
+ "T3.course_id",
1671
+ "GROUP",
1672
+ "BY",
1673
+ "T2.course_id"
1674
+ ],
1675
+ "query_toks_no_value": [
1676
+ "select",
1677
+ "t3",
1678
+ ".",
1679
+ "course_name",
1680
+ ",",
1681
+ "count",
1682
+ "(",
1683
+ "*",
1684
+ ")",
1685
+ "from",
1686
+ "students",
1687
+ "as",
1688
+ "t1",
1689
+ "join",
1690
+ "student_course_registrations",
1691
+ "as",
1692
+ "t2",
1693
+ "on",
1694
+ "t1",
1695
+ ".",
1696
+ "student_id",
1697
+ "=",
1698
+ "t2",
1699
+ ".",
1700
+ "student_id",
1701
+ "join",
1702
+ "courses",
1703
+ "as",
1704
+ "t3",
1705
+ "on",
1706
+ "t2",
1707
+ ".",
1708
+ "course_id",
1709
+ "=",
1710
+ "t3",
1711
+ ".",
1712
+ "course_id",
1713
+ "group",
1714
+ "by",
1715
+ "t2",
1716
+ ".",
1717
+ "course_id"
1718
+ ],
1719
+ "question_toks": [
1720
+ "How",
1721
+ "many",
1722
+ "registed",
1723
+ "students",
1724
+ "do",
1725
+ "each",
1726
+ "course",
1727
+ "have",
1728
+ "?",
1729
+ "List",
1730
+ "course",
1731
+ "name",
1732
+ "and",
1733
+ "the",
1734
+ "number",
1735
+ "of",
1736
+ "their",
1737
+ "registered",
1738
+ "students",
1739
+ "?"
1740
+ ]
1741
+ },
1742
+ {
1743
+ "db_id": "student_assessment",
1744
+ "query": "SELECT T3.course_name , count(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id JOIN courses AS T3 ON T2.course_id = T3.course_id GROUP BY T2.course_id",
1745
+ "question": "For each course id, how many students are registered and what are the course names?",
1746
+ "query_toks": [
1747
+ "SELECT",
1748
+ "T3.course_name",
1749
+ ",",
1750
+ "count",
1751
+ "(",
1752
+ "*",
1753
+ ")",
1754
+ "FROM",
1755
+ "students",
1756
+ "AS",
1757
+ "T1",
1758
+ "JOIN",
1759
+ "student_course_registrations",
1760
+ "AS",
1761
+ "T2",
1762
+ "ON",
1763
+ "T1.student_id",
1764
+ "=",
1765
+ "T2.student_id",
1766
+ "JOIN",
1767
+ "courses",
1768
+ "AS",
1769
+ "T3",
1770
+ "ON",
1771
+ "T2.course_id",
1772
+ "=",
1773
+ "T3.course_id",
1774
+ "GROUP",
1775
+ "BY",
1776
+ "T2.course_id"
1777
+ ],
1778
+ "query_toks_no_value": [
1779
+ "select",
1780
+ "t3",
1781
+ ".",
1782
+ "course_name",
1783
+ ",",
1784
+ "count",
1785
+ "(",
1786
+ "*",
1787
+ ")",
1788
+ "from",
1789
+ "students",
1790
+ "as",
1791
+ "t1",
1792
+ "join",
1793
+ "student_course_registrations",
1794
+ "as",
1795
+ "t2",
1796
+ "on",
1797
+ "t1",
1798
+ ".",
1799
+ "student_id",
1800
+ "=",
1801
+ "t2",
1802
+ ".",
1803
+ "student_id",
1804
+ "join",
1805
+ "courses",
1806
+ "as",
1807
+ "t3",
1808
+ "on",
1809
+ "t2",
1810
+ ".",
1811
+ "course_id",
1812
+ "=",
1813
+ "t3",
1814
+ ".",
1815
+ "course_id",
1816
+ "group",
1817
+ "by",
1818
+ "t2",
1819
+ ".",
1820
+ "course_id"
1821
+ ],
1822
+ "question_toks": [
1823
+ "For",
1824
+ "each",
1825
+ "course",
1826
+ "id",
1827
+ ",",
1828
+ "how",
1829
+ "many",
1830
+ "students",
1831
+ "are",
1832
+ "registered",
1833
+ "and",
1834
+ "what",
1835
+ "are",
1836
+ "the",
1837
+ "course",
1838
+ "names",
1839
+ "?"
1840
+ ]
1841
+ },
1842
+ {
1843
+ "db_id": "student_assessment",
1844
+ "query": "SELECT candidate_id FROM candidate_assessments WHERE asessment_outcome_code = \"Pass\"",
1845
+ "question": "Find id of candidates whose assessment code is \"Pass\"?",
1846
+ "query_toks": [
1847
+ "SELECT",
1848
+ "candidate_id",
1849
+ "FROM",
1850
+ "candidate_assessments",
1851
+ "WHERE",
1852
+ "asessment_outcome_code",
1853
+ "=",
1854
+ "``",
1855
+ "Pass",
1856
+ "''"
1857
+ ],
1858
+ "query_toks_no_value": [
1859
+ "select",
1860
+ "candidate_id",
1861
+ "from",
1862
+ "candidate_assessments",
1863
+ "where",
1864
+ "asessment_outcome_code",
1865
+ "=",
1866
+ "value"
1867
+ ],
1868
+ "question_toks": [
1869
+ "Find",
1870
+ "id",
1871
+ "of",
1872
+ "candidates",
1873
+ "whose",
1874
+ "assessment",
1875
+ "code",
1876
+ "is",
1877
+ "``",
1878
+ "Pass",
1879
+ "''",
1880
+ "?"
1881
+ ]
1882
+ },
1883
+ {
1884
+ "db_id": "student_assessment",
1885
+ "query": "SELECT candidate_id FROM candidate_assessments WHERE asessment_outcome_code = \"Pass\"",
1886
+ "question": "What are the ids of the candidates that have an outcome code of Pass?",
1887
+ "query_toks": [
1888
+ "SELECT",
1889
+ "candidate_id",
1890
+ "FROM",
1891
+ "candidate_assessments",
1892
+ "WHERE",
1893
+ "asessment_outcome_code",
1894
+ "=",
1895
+ "``",
1896
+ "Pass",
1897
+ "''"
1898
+ ],
1899
+ "query_toks_no_value": [
1900
+ "select",
1901
+ "candidate_id",
1902
+ "from",
1903
+ "candidate_assessments",
1904
+ "where",
1905
+ "asessment_outcome_code",
1906
+ "=",
1907
+ "value"
1908
+ ],
1909
+ "question_toks": [
1910
+ "What",
1911
+ "are",
1912
+ "the",
1913
+ "ids",
1914
+ "of",
1915
+ "the",
1916
+ "candidates",
1917
+ "that",
1918
+ "have",
1919
+ "an",
1920
+ "outcome",
1921
+ "code",
1922
+ "of",
1923
+ "Pass",
1924
+ "?"
1925
+ ]
1926
+ },
1927
+ {
1928
+ "db_id": "student_assessment",
1929
+ "query": "SELECT T3.cell_mobile_number FROM candidates AS T1 JOIN candidate_assessments AS T2 ON T1.candidate_id = T2.candidate_id JOIN people AS T3 ON T1.candidate_id = T3.person_id WHERE T2.asessment_outcome_code = \"Fail\"",
1930
+ "question": "Find the cell mobile number of the candidates whose assessment code is \"Fail\"?",
1931
+ "query_toks": [
1932
+ "SELECT",
1933
+ "T3.cell_mobile_number",
1934
+ "FROM",
1935
+ "candidates",
1936
+ "AS",
1937
+ "T1",
1938
+ "JOIN",
1939
+ "candidate_assessments",
1940
+ "AS",
1941
+ "T2",
1942
+ "ON",
1943
+ "T1.candidate_id",
1944
+ "=",
1945
+ "T2.candidate_id",
1946
+ "JOIN",
1947
+ "people",
1948
+ "AS",
1949
+ "T3",
1950
+ "ON",
1951
+ "T1.candidate_id",
1952
+ "=",
1953
+ "T3.person_id",
1954
+ "WHERE",
1955
+ "T2.asessment_outcome_code",
1956
+ "=",
1957
+ "``",
1958
+ "Fail",
1959
+ "''"
1960
+ ],
1961
+ "query_toks_no_value": [
1962
+ "select",
1963
+ "t3",
1964
+ ".",
1965
+ "cell_mobile_number",
1966
+ "from",
1967
+ "candidates",
1968
+ "as",
1969
+ "t1",
1970
+ "join",
1971
+ "candidate_assessments",
1972
+ "as",
1973
+ "t2",
1974
+ "on",
1975
+ "t1",
1976
+ ".",
1977
+ "candidate_id",
1978
+ "=",
1979
+ "t2",
1980
+ ".",
1981
+ "candidate_id",
1982
+ "join",
1983
+ "people",
1984
+ "as",
1985
+ "t3",
1986
+ "on",
1987
+ "t1",
1988
+ ".",
1989
+ "candidate_id",
1990
+ "=",
1991
+ "t3",
1992
+ ".",
1993
+ "person_id",
1994
+ "where",
1995
+ "t2",
1996
+ ".",
1997
+ "asessment_outcome_code",
1998
+ "=",
1999
+ "value"
2000
+ ],
2001
+ "question_toks": [
2002
+ "Find",
2003
+ "the",
2004
+ "cell",
2005
+ "mobile",
2006
+ "number",
2007
+ "of",
2008
+ "the",
2009
+ "candidates",
2010
+ "whose",
2011
+ "assessment",
2012
+ "code",
2013
+ "is",
2014
+ "``",
2015
+ "Fail",
2016
+ "''",
2017
+ "?"
2018
+ ]
2019
+ },
2020
+ {
2021
+ "db_id": "student_assessment",
2022
+ "query": "SELECT T3.cell_mobile_number FROM candidates AS T1 JOIN candidate_assessments AS T2 ON T1.candidate_id = T2.candidate_id JOIN people AS T3 ON T1.candidate_id = T3.person_id WHERE T2.asessment_outcome_code = \"Fail\"",
2023
+ "question": "What are the cell phone numbers of the candidates that received an assessment code of \"Fail\"?",
2024
+ "query_toks": [
2025
+ "SELECT",
2026
+ "T3.cell_mobile_number",
2027
+ "FROM",
2028
+ "candidates",
2029
+ "AS",
2030
+ "T1",
2031
+ "JOIN",
2032
+ "candidate_assessments",
2033
+ "AS",
2034
+ "T2",
2035
+ "ON",
2036
+ "T1.candidate_id",
2037
+ "=",
2038
+ "T2.candidate_id",
2039
+ "JOIN",
2040
+ "people",
2041
+ "AS",
2042
+ "T3",
2043
+ "ON",
2044
+ "T1.candidate_id",
2045
+ "=",
2046
+ "T3.person_id",
2047
+ "WHERE",
2048
+ "T2.asessment_outcome_code",
2049
+ "=",
2050
+ "``",
2051
+ "Fail",
2052
+ "''"
2053
+ ],
2054
+ "query_toks_no_value": [
2055
+ "select",
2056
+ "t3",
2057
+ ".",
2058
+ "cell_mobile_number",
2059
+ "from",
2060
+ "candidates",
2061
+ "as",
2062
+ "t1",
2063
+ "join",
2064
+ "candidate_assessments",
2065
+ "as",
2066
+ "t2",
2067
+ "on",
2068
+ "t1",
2069
+ ".",
2070
+ "candidate_id",
2071
+ "=",
2072
+ "t2",
2073
+ ".",
2074
+ "candidate_id",
2075
+ "join",
2076
+ "people",
2077
+ "as",
2078
+ "t3",
2079
+ "on",
2080
+ "t1",
2081
+ ".",
2082
+ "candidate_id",
2083
+ "=",
2084
+ "t3",
2085
+ ".",
2086
+ "person_id",
2087
+ "where",
2088
+ "t2",
2089
+ ".",
2090
+ "asessment_outcome_code",
2091
+ "=",
2092
+ "value"
2093
+ ],
2094
+ "question_toks": [
2095
+ "What",
2096
+ "are",
2097
+ "the",
2098
+ "cell",
2099
+ "phone",
2100
+ "numbers",
2101
+ "of",
2102
+ "the",
2103
+ "candidates",
2104
+ "that",
2105
+ "received",
2106
+ "an",
2107
+ "assessment",
2108
+ "code",
2109
+ "of",
2110
+ "``",
2111
+ "Fail",
2112
+ "''",
2113
+ "?"
2114
+ ]
2115
+ },
2116
+ {
2117
+ "db_id": "student_assessment",
2118
+ "query": "SELECT student_id FROM student_course_attendance WHERE course_id = 301",
2119
+ "question": "What are the id of students who registered course 301?",
2120
+ "query_toks": [
2121
+ "SELECT",
2122
+ "student_id",
2123
+ "FROM",
2124
+ "student_course_attendance",
2125
+ "WHERE",
2126
+ "course_id",
2127
+ "=",
2128
+ "301"
2129
+ ],
2130
+ "query_toks_no_value": [
2131
+ "select",
2132
+ "student_id",
2133
+ "from",
2134
+ "student_course_attendance",
2135
+ "where",
2136
+ "course_id",
2137
+ "=",
2138
+ "value"
2139
+ ],
2140
+ "question_toks": [
2141
+ "What",
2142
+ "are",
2143
+ "the",
2144
+ "id",
2145
+ "of",
2146
+ "students",
2147
+ "who",
2148
+ "registered",
2149
+ "course",
2150
+ "301",
2151
+ "?"
2152
+ ]
2153
+ },
2154
+ {
2155
+ "db_id": "student_assessment",
2156
+ "query": "SELECT student_id FROM student_course_attendance WHERE course_id = 301",
2157
+ "question": "What are the ids of the students who registered for course 301?",
2158
+ "query_toks": [
2159
+ "SELECT",
2160
+ "student_id",
2161
+ "FROM",
2162
+ "student_course_attendance",
2163
+ "WHERE",
2164
+ "course_id",
2165
+ "=",
2166
+ "301"
2167
+ ],
2168
+ "query_toks_no_value": [
2169
+ "select",
2170
+ "student_id",
2171
+ "from",
2172
+ "student_course_attendance",
2173
+ "where",
2174
+ "course_id",
2175
+ "=",
2176
+ "value"
2177
+ ],
2178
+ "question_toks": [
2179
+ "What",
2180
+ "are",
2181
+ "the",
2182
+ "ids",
2183
+ "of",
2184
+ "the",
2185
+ "students",
2186
+ "who",
2187
+ "registered",
2188
+ "for",
2189
+ "course",
2190
+ "301",
2191
+ "?"
2192
+ ]
2193
+ },
2194
+ {
2195
+ "db_id": "student_assessment",
2196
+ "query": "SELECT student_id FROM student_course_attendance WHERE course_id = 301 ORDER BY date_of_attendance DESC LIMIT 1",
2197
+ "question": "What is the id of the student who most recently registered course 301?",
2198
+ "query_toks": [
2199
+ "SELECT",
2200
+ "student_id",
2201
+ "FROM",
2202
+ "student_course_attendance",
2203
+ "WHERE",
2204
+ "course_id",
2205
+ "=",
2206
+ "301",
2207
+ "ORDER",
2208
+ "BY",
2209
+ "date_of_attendance",
2210
+ "DESC",
2211
+ "LIMIT",
2212
+ "1"
2213
+ ],
2214
+ "query_toks_no_value": [
2215
+ "select",
2216
+ "student_id",
2217
+ "from",
2218
+ "student_course_attendance",
2219
+ "where",
2220
+ "course_id",
2221
+ "=",
2222
+ "value",
2223
+ "order",
2224
+ "by",
2225
+ "date_of_attendance",
2226
+ "desc",
2227
+ "limit",
2228
+ "value"
2229
+ ],
2230
+ "question_toks": [
2231
+ "What",
2232
+ "is",
2233
+ "the",
2234
+ "id",
2235
+ "of",
2236
+ "the",
2237
+ "student",
2238
+ "who",
2239
+ "most",
2240
+ "recently",
2241
+ "registered",
2242
+ "course",
2243
+ "301",
2244
+ "?"
2245
+ ]
2246
+ },
2247
+ {
2248
+ "db_id": "student_assessment",
2249
+ "query": "SELECT student_id FROM student_course_attendance WHERE course_id = 301 ORDER BY date_of_attendance DESC LIMIT 1",
2250
+ "question": "What are the ids of the students who registered for course 301 most recently?",
2251
+ "query_toks": [
2252
+ "SELECT",
2253
+ "student_id",
2254
+ "FROM",
2255
+ "student_course_attendance",
2256
+ "WHERE",
2257
+ "course_id",
2258
+ "=",
2259
+ "301",
2260
+ "ORDER",
2261
+ "BY",
2262
+ "date_of_attendance",
2263
+ "DESC",
2264
+ "LIMIT",
2265
+ "1"
2266
+ ],
2267
+ "query_toks_no_value": [
2268
+ "select",
2269
+ "student_id",
2270
+ "from",
2271
+ "student_course_attendance",
2272
+ "where",
2273
+ "course_id",
2274
+ "=",
2275
+ "value",
2276
+ "order",
2277
+ "by",
2278
+ "date_of_attendance",
2279
+ "desc",
2280
+ "limit",
2281
+ "value"
2282
+ ],
2283
+ "question_toks": [
2284
+ "What",
2285
+ "are",
2286
+ "the",
2287
+ "ids",
2288
+ "of",
2289
+ "the",
2290
+ "students",
2291
+ "who",
2292
+ "registered",
2293
+ "for",
2294
+ "course",
2295
+ "301",
2296
+ "most",
2297
+ "recently",
2298
+ "?"
2299
+ ]
2300
+ },
2301
+ {
2302
+ "db_id": "student_assessment",
2303
+ "query": "SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id",
2304
+ "question": "Find distinct cities of addresses of people?",
2305
+ "query_toks": [
2306
+ "SELECT",
2307
+ "DISTINCT",
2308
+ "T1.city",
2309
+ "FROM",
2310
+ "addresses",
2311
+ "AS",
2312
+ "T1",
2313
+ "JOIN",
2314
+ "people_addresses",
2315
+ "AS",
2316
+ "T2",
2317
+ "ON",
2318
+ "T1.address_id",
2319
+ "=",
2320
+ "T2.address_id"
2321
+ ],
2322
+ "query_toks_no_value": [
2323
+ "select",
2324
+ "distinct",
2325
+ "t1",
2326
+ ".",
2327
+ "city",
2328
+ "from",
2329
+ "addresses",
2330
+ "as",
2331
+ "t1",
2332
+ "join",
2333
+ "people_addresses",
2334
+ "as",
2335
+ "t2",
2336
+ "on",
2337
+ "t1",
2338
+ ".",
2339
+ "address_id",
2340
+ "=",
2341
+ "t2",
2342
+ ".",
2343
+ "address_id"
2344
+ ],
2345
+ "question_toks": [
2346
+ "Find",
2347
+ "distinct",
2348
+ "cities",
2349
+ "of",
2350
+ "addresses",
2351
+ "of",
2352
+ "people",
2353
+ "?"
2354
+ ]
2355
+ },
2356
+ {
2357
+ "db_id": "student_assessment",
2358
+ "query": "SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id",
2359
+ "question": "What are the different cities where people live?",
2360
+ "query_toks": [
2361
+ "SELECT",
2362
+ "DISTINCT",
2363
+ "T1.city",
2364
+ "FROM",
2365
+ "addresses",
2366
+ "AS",
2367
+ "T1",
2368
+ "JOIN",
2369
+ "people_addresses",
2370
+ "AS",
2371
+ "T2",
2372
+ "ON",
2373
+ "T1.address_id",
2374
+ "=",
2375
+ "T2.address_id"
2376
+ ],
2377
+ "query_toks_no_value": [
2378
+ "select",
2379
+ "distinct",
2380
+ "t1",
2381
+ ".",
2382
+ "city",
2383
+ "from",
2384
+ "addresses",
2385
+ "as",
2386
+ "t1",
2387
+ "join",
2388
+ "people_addresses",
2389
+ "as",
2390
+ "t2",
2391
+ "on",
2392
+ "t1",
2393
+ ".",
2394
+ "address_id",
2395
+ "=",
2396
+ "t2",
2397
+ ".",
2398
+ "address_id"
2399
+ ],
2400
+ "question_toks": [
2401
+ "What",
2402
+ "are",
2403
+ "the",
2404
+ "different",
2405
+ "cities",
2406
+ "where",
2407
+ "people",
2408
+ "live",
2409
+ "?"
2410
+ ]
2411
+ },
2412
+ {
2413
+ "db_id": "student_assessment",
2414
+ "query": "SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id JOIN students AS T3 ON T2.person_id = T3.student_id",
2415
+ "question": "Find distinct cities of address of students?",
2416
+ "query_toks": [
2417
+ "SELECT",
2418
+ "DISTINCT",
2419
+ "T1.city",
2420
+ "FROM",
2421
+ "addresses",
2422
+ "AS",
2423
+ "T1",
2424
+ "JOIN",
2425
+ "people_addresses",
2426
+ "AS",
2427
+ "T2",
2428
+ "ON",
2429
+ "T1.address_id",
2430
+ "=",
2431
+ "T2.address_id",
2432
+ "JOIN",
2433
+ "students",
2434
+ "AS",
2435
+ "T3",
2436
+ "ON",
2437
+ "T2.person_id",
2438
+ "=",
2439
+ "T3.student_id"
2440
+ ],
2441
+ "query_toks_no_value": [
2442
+ "select",
2443
+ "distinct",
2444
+ "t1",
2445
+ ".",
2446
+ "city",
2447
+ "from",
2448
+ "addresses",
2449
+ "as",
2450
+ "t1",
2451
+ "join",
2452
+ "people_addresses",
2453
+ "as",
2454
+ "t2",
2455
+ "on",
2456
+ "t1",
2457
+ ".",
2458
+ "address_id",
2459
+ "=",
2460
+ "t2",
2461
+ ".",
2462
+ "address_id",
2463
+ "join",
2464
+ "students",
2465
+ "as",
2466
+ "t3",
2467
+ "on",
2468
+ "t2",
2469
+ ".",
2470
+ "person_id",
2471
+ "=",
2472
+ "t3",
2473
+ ".",
2474
+ "student_id"
2475
+ ],
2476
+ "question_toks": [
2477
+ "Find",
2478
+ "distinct",
2479
+ "cities",
2480
+ "of",
2481
+ "address",
2482
+ "of",
2483
+ "students",
2484
+ "?"
2485
+ ]
2486
+ },
2487
+ {
2488
+ "db_id": "student_assessment",
2489
+ "query": "SELECT DISTINCT T1.city FROM addresses AS T1 JOIN people_addresses AS T2 ON T1.address_id = T2.address_id JOIN students AS T3 ON T2.person_id = T3.student_id",
2490
+ "question": "What are the different cities where students live?",
2491
+ "query_toks": [
2492
+ "SELECT",
2493
+ "DISTINCT",
2494
+ "T1.city",
2495
+ "FROM",
2496
+ "addresses",
2497
+ "AS",
2498
+ "T1",
2499
+ "JOIN",
2500
+ "people_addresses",
2501
+ "AS",
2502
+ "T2",
2503
+ "ON",
2504
+ "T1.address_id",
2505
+ "=",
2506
+ "T2.address_id",
2507
+ "JOIN",
2508
+ "students",
2509
+ "AS",
2510
+ "T3",
2511
+ "ON",
2512
+ "T2.person_id",
2513
+ "=",
2514
+ "T3.student_id"
2515
+ ],
2516
+ "query_toks_no_value": [
2517
+ "select",
2518
+ "distinct",
2519
+ "t1",
2520
+ ".",
2521
+ "city",
2522
+ "from",
2523
+ "addresses",
2524
+ "as",
2525
+ "t1",
2526
+ "join",
2527
+ "people_addresses",
2528
+ "as",
2529
+ "t2",
2530
+ "on",
2531
+ "t1",
2532
+ ".",
2533
+ "address_id",
2534
+ "=",
2535
+ "t2",
2536
+ ".",
2537
+ "address_id",
2538
+ "join",
2539
+ "students",
2540
+ "as",
2541
+ "t3",
2542
+ "on",
2543
+ "t2",
2544
+ ".",
2545
+ "person_id",
2546
+ "=",
2547
+ "t3",
2548
+ ".",
2549
+ "student_id"
2550
+ ],
2551
+ "question_toks": [
2552
+ "What",
2553
+ "are",
2554
+ "the",
2555
+ "different",
2556
+ "cities",
2557
+ "where",
2558
+ "students",
2559
+ "live",
2560
+ "?"
2561
+ ]
2562
+ },
2563
+ {
2564
+ "db_id": "student_assessment",
2565
+ "query": "SELECT course_name FROM courses ORDER BY course_name",
2566
+ "question": "List the names of courses in alphabetical order?",
2567
+ "query_toks": [
2568
+ "SELECT",
2569
+ "course_name",
2570
+ "FROM",
2571
+ "courses",
2572
+ "ORDER",
2573
+ "BY",
2574
+ "course_name"
2575
+ ],
2576
+ "query_toks_no_value": [
2577
+ "select",
2578
+ "course_name",
2579
+ "from",
2580
+ "courses",
2581
+ "order",
2582
+ "by",
2583
+ "course_name"
2584
+ ],
2585
+ "question_toks": [
2586
+ "List",
2587
+ "the",
2588
+ "names",
2589
+ "of",
2590
+ "courses",
2591
+ "in",
2592
+ "alphabetical",
2593
+ "order",
2594
+ "?"
2595
+ ]
2596
+ },
2597
+ {
2598
+ "db_id": "student_assessment",
2599
+ "query": "SELECT course_name FROM courses ORDER BY course_name",
2600
+ "question": "What are the names of the courses in alphabetical order?",
2601
+ "query_toks": [
2602
+ "SELECT",
2603
+ "course_name",
2604
+ "FROM",
2605
+ "courses",
2606
+ "ORDER",
2607
+ "BY",
2608
+ "course_name"
2609
+ ],
2610
+ "query_toks_no_value": [
2611
+ "select",
2612
+ "course_name",
2613
+ "from",
2614
+ "courses",
2615
+ "order",
2616
+ "by",
2617
+ "course_name"
2618
+ ],
2619
+ "question_toks": [
2620
+ "What",
2621
+ "are",
2622
+ "the",
2623
+ "names",
2624
+ "of",
2625
+ "the",
2626
+ "courses",
2627
+ "in",
2628
+ "alphabetical",
2629
+ "order",
2630
+ "?"
2631
+ ]
2632
+ },
2633
+ {
2634
+ "db_id": "student_assessment",
2635
+ "query": "SELECT first_name FROM people ORDER BY first_name",
2636
+ "question": "List the first names of people in alphabetical order?",
2637
+ "query_toks": [
2638
+ "SELECT",
2639
+ "first_name",
2640
+ "FROM",
2641
+ "people",
2642
+ "ORDER",
2643
+ "BY",
2644
+ "first_name"
2645
+ ],
2646
+ "query_toks_no_value": [
2647
+ "select",
2648
+ "first_name",
2649
+ "from",
2650
+ "people",
2651
+ "order",
2652
+ "by",
2653
+ "first_name"
2654
+ ],
2655
+ "question_toks": [
2656
+ "List",
2657
+ "the",
2658
+ "first",
2659
+ "names",
2660
+ "of",
2661
+ "people",
2662
+ "in",
2663
+ "alphabetical",
2664
+ "order",
2665
+ "?"
2666
+ ]
2667
+ },
2668
+ {
2669
+ "db_id": "student_assessment",
2670
+ "query": "SELECT first_name FROM people ORDER BY first_name",
2671
+ "question": "What are the first names of the people in alphabetical order?",
2672
+ "query_toks": [
2673
+ "SELECT",
2674
+ "first_name",
2675
+ "FROM",
2676
+ "people",
2677
+ "ORDER",
2678
+ "BY",
2679
+ "first_name"
2680
+ ],
2681
+ "query_toks_no_value": [
2682
+ "select",
2683
+ "first_name",
2684
+ "from",
2685
+ "people",
2686
+ "order",
2687
+ "by",
2688
+ "first_name"
2689
+ ],
2690
+ "question_toks": [
2691
+ "What",
2692
+ "are",
2693
+ "the",
2694
+ "first",
2695
+ "names",
2696
+ "of",
2697
+ "the",
2698
+ "people",
2699
+ "in",
2700
+ "alphabetical",
2701
+ "order",
2702
+ "?"
2703
+ ]
2704
+ },
2705
+ {
2706
+ "db_id": "student_assessment",
2707
+ "query": "SELECT student_id FROM student_course_registrations UNION SELECT student_id FROM student_course_attendance",
2708
+ "question": "What are the id of students who registered courses or attended courses?",
2709
+ "query_toks": [
2710
+ "SELECT",
2711
+ "student_id",
2712
+ "FROM",
2713
+ "student_course_registrations",
2714
+ "UNION",
2715
+ "SELECT",
2716
+ "student_id",
2717
+ "FROM",
2718
+ "student_course_attendance"
2719
+ ],
2720
+ "query_toks_no_value": [
2721
+ "select",
2722
+ "student_id",
2723
+ "from",
2724
+ "student_course_registrations",
2725
+ "union",
2726
+ "select",
2727
+ "student_id",
2728
+ "from",
2729
+ "student_course_attendance"
2730
+ ],
2731
+ "question_toks": [
2732
+ "What",
2733
+ "are",
2734
+ "the",
2735
+ "id",
2736
+ "of",
2737
+ "students",
2738
+ "who",
2739
+ "registered",
2740
+ "courses",
2741
+ "or",
2742
+ "attended",
2743
+ "courses",
2744
+ "?"
2745
+ ]
2746
+ },
2747
+ {
2748
+ "db_id": "student_assessment",
2749
+ "query": "SELECT student_id FROM student_course_registrations UNION SELECT student_id FROM student_course_attendance",
2750
+ "question": "What are the ids of the students who either registered or attended a course?",
2751
+ "query_toks": [
2752
+ "SELECT",
2753
+ "student_id",
2754
+ "FROM",
2755
+ "student_course_registrations",
2756
+ "UNION",
2757
+ "SELECT",
2758
+ "student_id",
2759
+ "FROM",
2760
+ "student_course_attendance"
2761
+ ],
2762
+ "query_toks_no_value": [
2763
+ "select",
2764
+ "student_id",
2765
+ "from",
2766
+ "student_course_registrations",
2767
+ "union",
2768
+ "select",
2769
+ "student_id",
2770
+ "from",
2771
+ "student_course_attendance"
2772
+ ],
2773
+ "question_toks": [
2774
+ "What",
2775
+ "are",
2776
+ "the",
2777
+ "ids",
2778
+ "of",
2779
+ "the",
2780
+ "students",
2781
+ "who",
2782
+ "either",
2783
+ "registered",
2784
+ "or",
2785
+ "attended",
2786
+ "a",
2787
+ "course",
2788
+ "?"
2789
+ ]
2790
+ },
2791
+ {
2792
+ "db_id": "student_assessment",
2793
+ "query": "SELECT course_id FROM student_course_registrations WHERE student_id = 121 UNION SELECT course_id FROM student_course_attendance WHERE student_id = 121",
2794
+ "question": "Find the id of courses which are registered or attended by student whose id is 121?",
2795
+ "query_toks": [
2796
+ "SELECT",
2797
+ "course_id",
2798
+ "FROM",
2799
+ "student_course_registrations",
2800
+ "WHERE",
2801
+ "student_id",
2802
+ "=",
2803
+ "121",
2804
+ "UNION",
2805
+ "SELECT",
2806
+ "course_id",
2807
+ "FROM",
2808
+ "student_course_attendance",
2809
+ "WHERE",
2810
+ "student_id",
2811
+ "=",
2812
+ "121"
2813
+ ],
2814
+ "query_toks_no_value": [
2815
+ "select",
2816
+ "course_id",
2817
+ "from",
2818
+ "student_course_registrations",
2819
+ "where",
2820
+ "student_id",
2821
+ "=",
2822
+ "value",
2823
+ "union",
2824
+ "select",
2825
+ "course_id",
2826
+ "from",
2827
+ "student_course_attendance",
2828
+ "where",
2829
+ "student_id",
2830
+ "=",
2831
+ "value"
2832
+ ],
2833
+ "question_toks": [
2834
+ "Find",
2835
+ "the",
2836
+ "id",
2837
+ "of",
2838
+ "courses",
2839
+ "which",
2840
+ "are",
2841
+ "registered",
2842
+ "or",
2843
+ "attended",
2844
+ "by",
2845
+ "student",
2846
+ "whose",
2847
+ "id",
2848
+ "is",
2849
+ "121",
2850
+ "?"
2851
+ ]
2852
+ },
2853
+ {
2854
+ "db_id": "student_assessment",
2855
+ "query": "SELECT course_id FROM student_course_registrations WHERE student_id = 121 UNION SELECT course_id FROM student_course_attendance WHERE student_id = 121",
2856
+ "question": "What are the ids of the courses that are registered or attended by the student whose id is 121?",
2857
+ "query_toks": [
2858
+ "SELECT",
2859
+ "course_id",
2860
+ "FROM",
2861
+ "student_course_registrations",
2862
+ "WHERE",
2863
+ "student_id",
2864
+ "=",
2865
+ "121",
2866
+ "UNION",
2867
+ "SELECT",
2868
+ "course_id",
2869
+ "FROM",
2870
+ "student_course_attendance",
2871
+ "WHERE",
2872
+ "student_id",
2873
+ "=",
2874
+ "121"
2875
+ ],
2876
+ "query_toks_no_value": [
2877
+ "select",
2878
+ "course_id",
2879
+ "from",
2880
+ "student_course_registrations",
2881
+ "where",
2882
+ "student_id",
2883
+ "=",
2884
+ "value",
2885
+ "union",
2886
+ "select",
2887
+ "course_id",
2888
+ "from",
2889
+ "student_course_attendance",
2890
+ "where",
2891
+ "student_id",
2892
+ "=",
2893
+ "value"
2894
+ ],
2895
+ "question_toks": [
2896
+ "What",
2897
+ "are",
2898
+ "the",
2899
+ "ids",
2900
+ "of",
2901
+ "the",
2902
+ "courses",
2903
+ "that",
2904
+ "are",
2905
+ "registered",
2906
+ "or",
2907
+ "attended",
2908
+ "by",
2909
+ "the",
2910
+ "student",
2911
+ "whose",
2912
+ "id",
2913
+ "is",
2914
+ "121",
2915
+ "?"
2916
+ ]
2917
+ },
2918
+ {
2919
+ "db_id": "student_assessment",
2920
+ "query": "SELECT * FROM student_course_registrations WHERE student_id NOT IN (SELECT student_id FROM student_course_attendance)",
2921
+ "question": "What are all info of students who registered courses but not attended courses?",
2922
+ "query_toks": [
2923
+ "SELECT",
2924
+ "*",
2925
+ "FROM",
2926
+ "student_course_registrations",
2927
+ "WHERE",
2928
+ "student_id",
2929
+ "NOT",
2930
+ "IN",
2931
+ "(",
2932
+ "SELECT",
2933
+ "student_id",
2934
+ "FROM",
2935
+ "student_course_attendance",
2936
+ ")"
2937
+ ],
2938
+ "query_toks_no_value": [
2939
+ "select",
2940
+ "*",
2941
+ "from",
2942
+ "student_course_registrations",
2943
+ "where",
2944
+ "student_id",
2945
+ "not",
2946
+ "in",
2947
+ "(",
2948
+ "select",
2949
+ "student_id",
2950
+ "from",
2951
+ "student_course_attendance",
2952
+ ")"
2953
+ ],
2954
+ "question_toks": [
2955
+ "What",
2956
+ "are",
2957
+ "all",
2958
+ "info",
2959
+ "of",
2960
+ "students",
2961
+ "who",
2962
+ "registered",
2963
+ "courses",
2964
+ "but",
2965
+ "not",
2966
+ "attended",
2967
+ "courses",
2968
+ "?"
2969
+ ]
2970
+ },
2971
+ {
2972
+ "db_id": "student_assessment",
2973
+ "query": "SELECT * FROM student_course_registrations WHERE student_id NOT IN (SELECT student_id FROM student_course_attendance)",
2974
+ "question": "What are all details of the students who registered but did not attend any course?",
2975
+ "query_toks": [
2976
+ "SELECT",
2977
+ "*",
2978
+ "FROM",
2979
+ "student_course_registrations",
2980
+ "WHERE",
2981
+ "student_id",
2982
+ "NOT",
2983
+ "IN",
2984
+ "(",
2985
+ "SELECT",
2986
+ "student_id",
2987
+ "FROM",
2988
+ "student_course_attendance",
2989
+ ")"
2990
+ ],
2991
+ "query_toks_no_value": [
2992
+ "select",
2993
+ "*",
2994
+ "from",
2995
+ "student_course_registrations",
2996
+ "where",
2997
+ "student_id",
2998
+ "not",
2999
+ "in",
3000
+ "(",
3001
+ "select",
3002
+ "student_id",
3003
+ "from",
3004
+ "student_course_attendance",
3005
+ ")"
3006
+ ],
3007
+ "question_toks": [
3008
+ "What",
3009
+ "are",
3010
+ "all",
3011
+ "details",
3012
+ "of",
3013
+ "the",
3014
+ "students",
3015
+ "who",
3016
+ "registered",
3017
+ "but",
3018
+ "did",
3019
+ "not",
3020
+ "attend",
3021
+ "any",
3022
+ "course",
3023
+ "?"
3024
+ ]
3025
+ },
3026
+ {
3027
+ "db_id": "student_assessment",
3028
+ "query": "SELECT T2.student_id FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"statistics\" ORDER BY T2.registration_date",
3029
+ "question": "List the id of students who registered course statistics in the order of registration date.",
3030
+ "query_toks": [
3031
+ "SELECT",
3032
+ "T2.student_id",
3033
+ "FROM",
3034
+ "courses",
3035
+ "AS",
3036
+ "T1",
3037
+ "JOIN",
3038
+ "student_course_registrations",
3039
+ "AS",
3040
+ "T2",
3041
+ "ON",
3042
+ "T1.course_id",
3043
+ "=",
3044
+ "T2.course_id",
3045
+ "WHERE",
3046
+ "T1.course_name",
3047
+ "=",
3048
+ "``",
3049
+ "statistics",
3050
+ "''",
3051
+ "ORDER",
3052
+ "BY",
3053
+ "T2.registration_date"
3054
+ ],
3055
+ "query_toks_no_value": [
3056
+ "select",
3057
+ "t2",
3058
+ ".",
3059
+ "student_id",
3060
+ "from",
3061
+ "courses",
3062
+ "as",
3063
+ "t1",
3064
+ "join",
3065
+ "student_course_registrations",
3066
+ "as",
3067
+ "t2",
3068
+ "on",
3069
+ "t1",
3070
+ ".",
3071
+ "course_id",
3072
+ "=",
3073
+ "t2",
3074
+ ".",
3075
+ "course_id",
3076
+ "where",
3077
+ "t1",
3078
+ ".",
3079
+ "course_name",
3080
+ "=",
3081
+ "value",
3082
+ "order",
3083
+ "by",
3084
+ "t2",
3085
+ ".",
3086
+ "registration_date"
3087
+ ],
3088
+ "question_toks": [
3089
+ "List",
3090
+ "the",
3091
+ "id",
3092
+ "of",
3093
+ "students",
3094
+ "who",
3095
+ "registered",
3096
+ "course",
3097
+ "statistics",
3098
+ "in",
3099
+ "the",
3100
+ "order",
3101
+ "of",
3102
+ "registration",
3103
+ "date",
3104
+ "."
3105
+ ]
3106
+ },
3107
+ {
3108
+ "db_id": "student_assessment",
3109
+ "query": "SELECT T2.student_id FROM courses AS T1 JOIN student_course_registrations AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"statistics\" ORDER BY T2.registration_date",
3110
+ "question": "What are the ids of the students who registered course statistics by order of registration date?",
3111
+ "query_toks": [
3112
+ "SELECT",
3113
+ "T2.student_id",
3114
+ "FROM",
3115
+ "courses",
3116
+ "AS",
3117
+ "T1",
3118
+ "JOIN",
3119
+ "student_course_registrations",
3120
+ "AS",
3121
+ "T2",
3122
+ "ON",
3123
+ "T1.course_id",
3124
+ "=",
3125
+ "T2.course_id",
3126
+ "WHERE",
3127
+ "T1.course_name",
3128
+ "=",
3129
+ "``",
3130
+ "statistics",
3131
+ "''",
3132
+ "ORDER",
3133
+ "BY",
3134
+ "T2.registration_date"
3135
+ ],
3136
+ "query_toks_no_value": [
3137
+ "select",
3138
+ "t2",
3139
+ ".",
3140
+ "student_id",
3141
+ "from",
3142
+ "courses",
3143
+ "as",
3144
+ "t1",
3145
+ "join",
3146
+ "student_course_registrations",
3147
+ "as",
3148
+ "t2",
3149
+ "on",
3150
+ "t1",
3151
+ ".",
3152
+ "course_id",
3153
+ "=",
3154
+ "t2",
3155
+ ".",
3156
+ "course_id",
3157
+ "where",
3158
+ "t1",
3159
+ ".",
3160
+ "course_name",
3161
+ "=",
3162
+ "value",
3163
+ "order",
3164
+ "by",
3165
+ "t2",
3166
+ ".",
3167
+ "registration_date"
3168
+ ],
3169
+ "question_toks": [
3170
+ "What",
3171
+ "are",
3172
+ "the",
3173
+ "ids",
3174
+ "of",
3175
+ "the",
3176
+ "students",
3177
+ "who",
3178
+ "registered",
3179
+ "course",
3180
+ "statistics",
3181
+ "by",
3182
+ "order",
3183
+ "of",
3184
+ "registration",
3185
+ "date",
3186
+ "?"
3187
+ ]
3188
+ },
3189
+ {
3190
+ "db_id": "student_assessment",
3191
+ "query": "SELECT T2.student_id FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"statistics\" ORDER BY T2.date_of_attendance",
3192
+ "question": "List the id of students who attended statistics courses in the order of attendance date.",
3193
+ "query_toks": [
3194
+ "SELECT",
3195
+ "T2.student_id",
3196
+ "FROM",
3197
+ "courses",
3198
+ "AS",
3199
+ "T1",
3200
+ "JOIN",
3201
+ "student_course_attendance",
3202
+ "AS",
3203
+ "T2",
3204
+ "ON",
3205
+ "T1.course_id",
3206
+ "=",
3207
+ "T2.course_id",
3208
+ "WHERE",
3209
+ "T1.course_name",
3210
+ "=",
3211
+ "``",
3212
+ "statistics",
3213
+ "''",
3214
+ "ORDER",
3215
+ "BY",
3216
+ "T2.date_of_attendance"
3217
+ ],
3218
+ "query_toks_no_value": [
3219
+ "select",
3220
+ "t2",
3221
+ ".",
3222
+ "student_id",
3223
+ "from",
3224
+ "courses",
3225
+ "as",
3226
+ "t1",
3227
+ "join",
3228
+ "student_course_attendance",
3229
+ "as",
3230
+ "t2",
3231
+ "on",
3232
+ "t1",
3233
+ ".",
3234
+ "course_id",
3235
+ "=",
3236
+ "t2",
3237
+ ".",
3238
+ "course_id",
3239
+ "where",
3240
+ "t1",
3241
+ ".",
3242
+ "course_name",
3243
+ "=",
3244
+ "value",
3245
+ "order",
3246
+ "by",
3247
+ "t2",
3248
+ ".",
3249
+ "date_of_attendance"
3250
+ ],
3251
+ "question_toks": [
3252
+ "List",
3253
+ "the",
3254
+ "id",
3255
+ "of",
3256
+ "students",
3257
+ "who",
3258
+ "attended",
3259
+ "statistics",
3260
+ "courses",
3261
+ "in",
3262
+ "the",
3263
+ "order",
3264
+ "of",
3265
+ "attendance",
3266
+ "date",
3267
+ "."
3268
+ ]
3269
+ },
3270
+ {
3271
+ "db_id": "student_assessment",
3272
+ "query": "SELECT T2.student_id FROM courses AS T1 JOIN student_course_attendance AS T2 ON T1.course_id = T2.course_id WHERE T1.course_name = \"statistics\" ORDER BY T2.date_of_attendance",
3273
+ "question": "What are the ids of the students who attended courses in the statistics department in order of attendance date.",
3274
+ "query_toks": [
3275
+ "SELECT",
3276
+ "T2.student_id",
3277
+ "FROM",
3278
+ "courses",
3279
+ "AS",
3280
+ "T1",
3281
+ "JOIN",
3282
+ "student_course_attendance",
3283
+ "AS",
3284
+ "T2",
3285
+ "ON",
3286
+ "T1.course_id",
3287
+ "=",
3288
+ "T2.course_id",
3289
+ "WHERE",
3290
+ "T1.course_name",
3291
+ "=",
3292
+ "``",
3293
+ "statistics",
3294
+ "''",
3295
+ "ORDER",
3296
+ "BY",
3297
+ "T2.date_of_attendance"
3298
+ ],
3299
+ "query_toks_no_value": [
3300
+ "select",
3301
+ "t2",
3302
+ ".",
3303
+ "student_id",
3304
+ "from",
3305
+ "courses",
3306
+ "as",
3307
+ "t1",
3308
+ "join",
3309
+ "student_course_attendance",
3310
+ "as",
3311
+ "t2",
3312
+ "on",
3313
+ "t1",
3314
+ ".",
3315
+ "course_id",
3316
+ "=",
3317
+ "t2",
3318
+ ".",
3319
+ "course_id",
3320
+ "where",
3321
+ "t1",
3322
+ ".",
3323
+ "course_name",
3324
+ "=",
3325
+ "value",
3326
+ "order",
3327
+ "by",
3328
+ "t2",
3329
+ ".",
3330
+ "date_of_attendance"
3331
+ ],
3332
+ "question_toks": [
3333
+ "What",
3334
+ "are",
3335
+ "the",
3336
+ "ids",
3337
+ "of",
3338
+ "the",
3339
+ "students",
3340
+ "who",
3341
+ "attended",
3342
+ "courses",
3343
+ "in",
3344
+ "the",
3345
+ "statistics",
3346
+ "department",
3347
+ "in",
3348
+ "order",
3349
+ "of",
3350
+ "attendance",
3351
+ "date",
3352
+ "."
3353
+ ]
3354
+ }
3355
+ ]
docs/ARCHITECTURE.md ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Architecture
2
+
3
+ > Last updated: 2026-02-28
4
+
5
+ System map for SQLEnv — an RL environment where agents learn interactive SQL exploration via the OpenEnv framework.
6
+
7
+ **Goals:**
8
+ - Show how components connect (system map + key flows)
9
+ - Make hidden state explicit (what lives where)
10
+ - Define shared interfaces (Pydantic models, WebSocket API)
11
+ - Keep invariants legible (what must stay true)
12
+
13
+ **Non-goals:**
14
+ - CLI reference (see `docs/RUNBOOK.md`)
15
+ - Per-feature implementation details (link to specs)
16
+
17
+ ---
18
+
19
+ ## System Map
20
+
21
+ ```text
22
+ SQLEnv System
23
+ ================================================================
24
+
25
+ RL Training Loop SQLEnv Server (Docker)
26
+ ---------------- ----------------------
27
+ +---------------------+
28
+ +------------+ WebSocket (JSON) | server/app.py |
29
+ | SQLEnv |<=========================>| FastAPI + WS |
30
+ | Client | SQLAction -> server | |
31
+ | (client.py)| SQLObs <- server +----------+----------+
32
+ +-----+------+ |
33
+ | v
34
+ | tensor <-> list +---------------------+
35
+ | serialization | SQLEnvironment |
36
+ | | (sql_environment.py)|
37
+ +-----v------+ | |
38
+ | RL Agent | | - reset() / step() |
39
+ | (external) | | - action detection |
40
+ | e.g. GRPO | | - message_to_action |
41
+ +------------+ +--+-------+-------+--+
42
+ | | |
43
+ v v v
44
+ +------+ +------+ +--------+
45
+ |Schema| |Sample| | Query |
46
+ |Intro-| |Gen | | (Ollama|
47
+ |spect.| | | | LLM) |
48
+ +--+---+ +--+---+ +---+----+
49
+ | | |
50
+ v v v
51
+ +-------------------------+
52
+ | SQLAlchemy ORM Models |
53
+ | (data/databases/ |
54
+ | models.py) |
55
+ | 9 tables: |
56
+ | Address, Person, |
57
+ | Student, Course, ... |
58
+ +-------------------------+
59
+
60
+ Data (committed) External (optional)
61
+ ---------------- -------------------
62
+ data/questions/ +----------+
63
+ student_assessment.json | Ollama |
64
+ (53 Spider Q&A pairs) | LLM API |
65
+ | :11434 |
66
+ +----------+
67
+ ```
68
+
69
+ ---
70
+
71
+ ## Component Inventory
72
+
73
+ | Component | Owns | Entrypoint | State / Output |
74
+ |-----------|------|------------|----------------|
75
+ | **SQLEnvClient** | WebSocket transport, tensor serialization | `client.py` | Stateless (wraps server) |
76
+ | **FastAPI app** | HTTP/WS endpoints, tokenizer factory | `server/app.py` | In-memory tokenizer |
77
+ | **SQLEnvironment** | Episode lifecycle, action dispatch, state | `server/sql_environment.py` | `SQLState` (in-memory) |
78
+ | **Pydantic models** | Type contracts (action, observation, state) | `models.py` | N/A (data classes) |
79
+ | **ORM models** | Database schema definition | `data/databases/models.py` | SQLAlchemy metadata |
80
+ | **Spider data** | Question-answer pairs | `data/questions/student_assessment.json` | 53 Q&A entries |
81
+ | **MockTokenizer** | Dev/test tokenization (no GPU needed) | `server/test_sql_env.py` | Deterministic (ord/chr) |
82
+
83
+ ### External Services
84
+
85
+ | Service | Purpose | Required | Fallback |
86
+ |---------|---------|----------|----------|
87
+ | Ollama (`localhost:11434`) | Table selection + SQL generation | No | First table in dict; query returns error string |
88
+
89
+ ---
90
+
91
+ ## Key Flows
92
+
93
+ ### Flow: Episode (Reset + Multi-Turn Steps)
94
+
95
+ ```text
96
+ Client Server (SQLEnvironment) Ollama
97
+ | | |
98
+ |--- reset() ----------------->| |
99
+ | |-- init state, system prompt |
100
+ | |-- tokenize system message |
101
+ |<-- SQLObservation -----------| (MockTokenizer or HF) |
102
+ | .messages=[system] | |
103
+ | .tokens=shape([N]) | |
104
+ | | |
105
+ |--- message_to_action(msg) -->| |
106
+ | |-- detect action type |
107
+ | | (keyword matching) |
108
+ | |-- append msg to history |
109
+ | |-- tokenize full conversation |
110
+ |<-- SQLAction ----------------| |
111
+ | .action_type="describe" | |
112
+ | .tokens=shape([1,M]) | |
113
+ | | |
114
+ |--- step(action) ------------>| |
115
+ | |-- select table -------------->|
116
+ | |<-- table name (or fallback) --|
117
+ | |-- introspect ORM schema |
118
+ | |-- append assistant msg |
119
+ | |-- append action tokens |
120
+ |<-- SQLObservation -----------| |
121
+ | .messages=[sys,usr,asst] | |
122
+ | .tokens=shape([N+M+K]) | |
123
+ | | |
124
+ (repeat step() for sample, query, answer...)
125
+ ```
126
+
127
+ ### Flow: Action Detection
128
+
129
+ ```text
130
+ User message string
131
+ |
132
+ v
133
+ _detect_action_type(content)
134
+ |
135
+ +-- contains "describe"/"schema"/"columns"? --> "describe"
136
+ |
137
+ +-- contains "sample"/"example"/"rows"? --> "sample"
138
+ |
139
+ +-- default --> "query"
140
+ ```
141
+
142
+ ### Flow: Client Serialization (WebSocket Transport)
143
+
144
+ ```text
145
+ Client Server
146
+ | |
147
+ | _step_payload(action): |
148
+ | tokens: Tensor -> list (JSON-safe) |
149
+ | {action_type, action_description, |
150
+ | tokens: [[1,2,3,...]], metadata} |
151
+ | ---------------------------------------->|
152
+ | |
153
+ | _parse_result(data): |
154
+ | tokens: list -> Tensor |
155
+ | StepResult(obs, reward, done, info) |
156
+ | <----------------------------------------|
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Shared Data Models
162
+
163
+ These three Pydantic models are used across client, server, and tests.
164
+ Defined in `models.py`.
165
+
166
+ ### SQLAction
167
+
168
+ ```python
169
+ class SQLAction(Action):
170
+ action_type: str # "describe" | "sample" | "query" | "answer"
171
+ action_description: str # raw user message content
172
+ tokens: torch.Tensor # tokenized conversation context, shape [1, seq_len]
173
+ ```
174
+
175
+ **Used by:** SQLEnvironment.step(), SQLEnvClient._step_payload(), tests
176
+
177
+ ### SQLObservation
178
+
179
+ ```python
180
+ class SQLObservation(Observation):
181
+ messages: list[Message] # full conversation history [{role, content}, ...]
182
+ tokens: torch.Tensor # flattened 1D tensor of all turn tokens concatenated
183
+ ```
184
+
185
+ **Used by:** SQLEnvironment.reset()/step(), SQLEnvClient._parse_result(), tests
186
+
187
+ ### SQLState
188
+
189
+ ```python
190
+ class SQLState(State):
191
+ episode_id: str # UUID per episode
192
+ step_count: int # turns taken
193
+ history_messages: list[Message] # accumulates across turns
194
+ history_tokens: list[torch.Tensor] # one tensor per turn, flattened on output
195
+ current_action_type: str | None # last detected action type
196
+ ```
197
+
198
+ **Used by:** SQLEnvironment (internal), state endpoint
199
+ **Note:** This is a lightweight summary for logging. The full RL state lives inside SQLEnvironment and is not exposed to the agent.
200
+
201
+ ---
202
+
203
+ ## API Contracts
204
+
205
+ ### WebSocket (OpenEnv Protocol)
206
+
207
+ The server exposes a WebSocket endpoint via FastAPI. The OpenEnv framework handles the protocol — SQLEnv implements `reset()` and `step()` on the server side, and `SQLEnvClient` wraps the client side.
208
+
209
+ | Operation | Client Method | Payload | Response |
210
+ |-----------|---------------|---------|----------|
211
+ | Reset | `client.reset()` | `{}` | `SQLObservation` (JSON) |
212
+ | Step | `client.step(action)` | `{action_type, action_description, tokens: list, metadata}` | `StepResult(obs, reward, done, info)` |
213
+ | State | `client.state()` | `{}` | `SQLState` (JSON) |
214
+
215
+ ### Ollama (Optional)
216
+
217
+ | Endpoint | Purpose | Payload |
218
+ |----------|---------|---------|
219
+ | `POST /api/generate` | Table selection | `{model, prompt, stream: false}` |
220
+ | `POST /api/generate` | SQL generation | `{model, prompt, stream: false}` |
221
+
222
+ Timeout: 30s. Failure mode: graceful fallback (never crashes).
223
+
224
+ ---
225
+
226
+ ## Cross-Cutting Concerns
227
+
228
+ ### Code Style & Abstraction Philosophy
229
+
230
+ OOP for framework integration (Environment, EnvClient subclasses), plain methods for logic. Extract helpers when they clarify intent, not for DRY.
231
+
232
+ - **Structure:** Flat package root with `server/` for server-only code
233
+ - **Error handling:** Graceful fallbacks (never crash), `ValueError` for invalid inputs
234
+ - **Imports:** `try: from sql_env.X / except: from X` for dual install/Docker compatibility
235
+
236
+ ### Tokenization
237
+
238
+ Two paths, same interface (`apply_chat_template`):
239
+
240
+ | Mode | Tokenizer | Source | When |
241
+ |------|-----------|--------|------|
242
+ | Dev/Test | `MockTokenizer` | `server/test_sql_env.py` | No GPU, no downloads |
243
+ | Production | HuggingFace | `transformers` library | Real RL training |
244
+
245
+ `MockTokenizer` encodes as `ord(c)` per character, decodes as `chr(t)`. Deterministic and fast.
246
+
247
+ ### Configuration
248
+
249
+ | Variable | Required | Description | Default |
250
+ |----------|----------|-------------|---------|
251
+ | `OLLAMA_MODEL` | No | Ollama model name for SQL generation | `qwen2` |
252
+ | `OLLAMA_BASE_URL` | No | Ollama API endpoint | `http://localhost:11434` |
253
+
254
+ ---
255
+
256
+ ## Data, State, and Storage Locations
257
+
258
+ - **Repo (committed):**
259
+ - `data/questions/student_assessment.json` — 53 Spider Q&A pairs
260
+ - `data/databases/models.py` — 9 SQLAlchemy ORM table definitions
261
+ - **Runtime state (in-memory, per episode):**
262
+ - `SQLState.history_messages` — conversation messages
263
+ - `SQLState.history_tokens` — tensor per turn
264
+ - **Not yet implemented:**
265
+ - SQLite database files (Phase 3 — queries currently go through Ollama, not executed locally)
266
+ - Reward/verification state
267
+
268
+ ---
269
+
270
+ ## Invariants and Guardrails
271
+
272
+ - `self.db_models` refers to **database table** models (SQLAlchemy), never RL models
273
+ - Token tensors grow monotonically across turns (never shrink or reset mid-episode)
274
+ - `message_to_action()` mutates state — it appends to history before tokenizing
275
+ - Ollama failures never crash the environment — always graceful fallback
276
+ - `tests/test_smoke.py` must pass without Ollama, without GPU, without network
277
+ - Schema column names in `_build_schema_description()` must match `data/databases/models.py`
278
+
279
+ ---
280
+
281
+ ## Glossary
282
+
283
+ | Term | Definition |
284
+ |------|------------|
285
+ | Episode | One question-answering session: reset -> N steps -> terminal |
286
+ | Action type | One of: describe, sample, query, answer |
287
+ | MockTokenizer | Deterministic char-code tokenizer for dev/test (no GPU) |
288
+ | Spider | Academic text-to-SQL benchmark dataset |
289
+ | ORM models | SQLAlchemy class definitions in `data/databases/models.py` |
290
+ | OpenEnv | Meta's RL environment framework (Environment, EnvClient, Action, Observation) |
291
+
292
+ ---
293
+
294
+ ## Infrastructure
295
+
296
+ ### Development
297
+
298
+ **Prerequisites:**
299
+ - Python 3.11-3.12 (torch incompatible with 3.13)
300
+ - `uv` package manager
301
+ - Ollama (optional)
302
+
303
+ **Setup:**
304
+ ```bash
305
+ git clone <repo-url> && cd sql-env
306
+ uv sync
307
+ uv run pytest tests/ -v # 21 tests, ~3.5s, no external deps
308
+ ```
309
+
310
+ ### Production
311
+
312
+ **Deployment:** Docker container via OpenEnv CLI (`openenv build` / `openenv push`)
313
+ **Runtime:** FastAPI on port 8000 (defined in `openenv.yaml`)
314
+ **Status:** Dockerfile is a scaffold stub — not yet validated
315
+
316
+ ---
317
+
318
+ ## Suggested Feature Breakdown
319
+
320
+ | ID | Feature | Complexity | Dependencies | Notes |
321
+ |----|---------|------------|--------------|-------|
322
+ | F001 | SQL query execution | standard | - | Execute queries against real SQLite, return results |
323
+ | F002 | Reward computation | standard | F001 | 3-layer reward: operational, progress, terminal |
324
+ | F003 | Answer verification | standard | F001 | Compare agent answer to gold SQL results |
325
+ | F004 | Docker validation | simple | - | Update Dockerfile, test `openenv build` |
326
+ | F005 | Multi-database support | complex | F001 | Load any Spider database, not just student_assessment |
327
+
328
+ ### Suggested Implementation Order
329
+
330
+ 1. **F001** — Foundation: wire up SQLite execution so queries return real data
331
+ 2. **F002 + F003** — Can be done in parallel once F001 is complete
332
+ 3. **F004** — Independent, can be done anytime
333
+ 4. **F005** — After the single-database path is solid
334
+
335
+ ---
336
+
337
+ ## Future Considerations
338
+
339
+ - **Real SQLite execution:** Queries currently go to Ollama for SQL generation but aren't executed against a database. Phase 3 should execute the generated SQL and return actual results.
340
+ - **Multi-episode batching:** For RL training, the environment will need to support multiple concurrent episodes efficiently.
341
+ - **Reward shaping:** The 3-layer reward (operational, progress, terminal) is designed in `models.py` but not implemented.
342
+ - **Table selection without Ollama:** A lightweight keyword/embedding-based table selector could replace the LLM fallback.
343
+
344
+ ---
345
+
346
+ ## Keeping This Map Current
347
+
348
+ Update this file when you change any of:
349
+ - System boundaries (new service, new subsystem)
350
+ - Persistent state locations (new files/dirs written or read)
351
+ - Shared data models or API contracts
352
+ - Cross-cutting invariants
353
+
354
+ ---
355
+
356
+ ## References
357
+
358
+ - Docs index: `docs/README.md`
359
+ - Operations: `docs/RUNBOOK.md`
360
+ - OpenEnv framework: https://github.com/meta-pytorch/OpenEnv
361
+ - Spider dataset: https://huggingface.co/datasets/xlangai/spider
docs/README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docs
2
+
3
+ This directory is the system-of-record for durable project knowledge.
4
+
5
+ ## Quick Links
6
+
7
+ | Category | Index | Type | Purpose |
8
+ |----------|-------|------|---------|
9
+ | **Guides** | [guides/README.md](guides/README.md) | how-to | Practical step-by-step procedures |
10
+ | **Design** | [design-docs/index.md](design-docs/index.md) | explanation | Feature design, ADRs, decision rationale |
11
+ | **ADR Template** | [design-docs/decisions/0001-template.md](design-docs/decisions/0001-template.md) | reference | Decision record template |
12
+ | **References** | [references/README.md](references/README.md) | reference | External docs for agent context |
13
+
14
+ ## System Docs
15
+
16
+ - Architecture: [ARCHITECTURE.md](ARCHITECTURE.md)
17
+ - Operations: [RUNBOOK.md](RUNBOOK.md)
18
+
19
+ ## Directory Structure
20
+
21
+ ```
22
+ docs/
23
+ ├── README.md # This file (index)
24
+ ├── ARCHITECTURE.md # System design overview [reference]
25
+ ├── RUNBOOK.md # Operations guide [how-to]
26
+ ├── guides/ # How-to guides [how-to]
27
+ │ └── README.md # Guide index
28
+ ├── design-docs/ # Decision rationale [explanation]
29
+ │ ├── index.md # Design docs catalogue
30
+ │ └── decisions/ # Architectural Decision Records
31
+ └── references/ # External docs [reference]
32
+ └── README.md # External docs for agent context
33
+ ```
34
+
35
+ ## Adding Documentation
36
+
37
+ | If you need... | Create in... | Type |
38
+ |----------------|--------------|------|
39
+ | Step-by-step procedure | `docs/guides/<topic>.md` | how-to |
40
+ | Design for a feature | `docs/design-docs/<feature>.md` | explanation |
41
+ | External library docs | `docs/references/<library>-llms.txt` | reference |
docs/RUNBOOK.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Runbook
2
+
3
+ Operational notes: how to run, test, and debug day-to-day.
4
+
5
+ ## Common Commands
6
+
7
+ ```bash
8
+ # Run tests (project package manager)
9
+ uv run pytest tests/ -v
10
+ ```
docs/blog-outline.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SQLEnv Blog Post Outline
2
+
3
+ ## 1) Hook: Teaching AI to Think Like a Data Analyst
4
+
5
+ - Open with a concrete moment: an agent sees a new schema and must reason through uncertainty instead of guessing one SQL query.
6
+ - Frame the core idea: SQL competence is not only syntax generation; it is iterative investigation with feedback.
7
+ - Position SQLEnv as a training ground where agents learn exploration habits that mirror analyst workflows.
8
+
9
+ ## 2) The Problem: Static Benchmarks Reward Memorization
10
+
11
+ - Explain why single-shot text-to-SQL can hide brittle behavior when schemas, table names, or data distributions shift.
12
+ - Show that leaderboard accuracy does not guarantee robust reasoning on unfamiliar databases.
13
+ - Describe the gap: most benchmarks grade final answers but ignore how the model arrived there.
14
+ - Tie this directly to user pain: correct-looking SQL can fail in real environments where context changes every session.
15
+
16
+ ## 3) Our Approach: SQLEnv as an Interactive RL Environment
17
+
18
+ - Introduce the action loop: `DESCRIBE`, `SAMPLE`, `QUERY`, and `ANSWER` as the minimum interface for grounded exploration.
19
+ - Explain that each episode starts with a natural-language question and a hidden schema to force discovery.
20
+ - Highlight OpenEnv compatibility so the environment can run with standard training tooling and deployment flows.
21
+
22
+ ## 4) How SQLEnv Works End-to-End
23
+
24
+ - Walk through one episode narrative: inspect table shapes, sample data, run targeted joins, then submit an answer.
25
+ - Summarize reward design in plain language: reward reliable execution, reward progress toward the goal, and strongly reward final correctness.
26
+ - Note guardrails: read-only SQL execution, query timeout, and clear error messages to prevent unsafe or confusing behavior.
27
+
28
+ ## 5) Training with GRPO
29
+
30
+ - Briefly explain GRPO as a practical policy optimization method for improving multi-step tool use behavior.
31
+ - Connect training signals to environment telemetry: each step gives usable feedback rather than waiting for terminal reward only.
32
+ - Clarify expected outcome: strategic behavior should improve over random baselines even with modest compute.
33
+
34
+ ## 6) Results
35
+
36
+ - [PLACEHOLDER: Insert F006 metrics for success rate, average reward, and episode efficiency.]
37
+ - Compare random baseline, trained policy, and oracle policy to show both practical gains and theoretical ceiling.
38
+ - Include one short failure case to show where the policy still struggles and why that insight is useful.
39
+
40
+ ## 7) Technical Highlights
41
+
42
+ - Multi-database Spider coverage with structured metadata and deterministic train/eval split.
43
+ - Typed action and observation models that make environment interactions explicit and debuggable.
44
+ - Deployment-ready packaging for HuggingFace Spaces with bundled databases and health checks.
45
+
46
+ ## 8) Try It Yourself
47
+
48
+ - HuggingFace Space: add live link and a one-line instruction for connecting and running a first episode.
49
+ - Colab notebook: link `notebooks/train_grpo.ipynb` with notes on expected runtime and CPU compatibility.
50
+ - GitHub repository: link setup steps, architecture docs, and verification artifacts for reproducibility.
51
+
52
+ ## 9) What We Learned
53
+
54
+ - Dense intermediate rewards improve learning speed only when they align with the final objective.
55
+ - Tool-using agents benefit from transparent errors; better diagnostics create better policy updates.
56
+ - Packaging and storytelling matter: a reproducible deployment and clear narrative are as important as benchmark numbers for adoption.
docs/design-docs/decisions/0001-template.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ADR 0001: <Title>
2
+
3
+ ## Status
4
+
5
+ - Proposed | Accepted | Rejected | Deprecated
6
+
7
+ ## Context
8
+
9
+ Describe the problem and constraints.
10
+
11
+ ## Decision
12
+
13
+ What we decided and why.
14
+
15
+ ## Consequences
16
+
17
+ What gets better, what gets worse, what we need to watch.
18
+
19
+ ## Alternatives Considered
20
+
21
+ List viable alternatives and why they were not chosen.
22
+
23
+ ## Links
24
+
25
+ - Related spec(s):
26
+ - Related PR(s):
docs/design-docs/index.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Design Docs
2
+
3
+ This directory contains design documentation for architectural decisions — the WHY behind technical choices.
4
+
5
+ ## Core Beliefs
6
+
7
+ See [core-beliefs.md](core-beliefs.md) for agent-first operating principles.
8
+
9
+ ## Decisions (ADRs)
10
+
11
+ Architectural Decision Records are stored in [decisions/](decisions/).
12
+
13
+ | ADR | Title | Status |
14
+ |-----|-------|--------|
15
+ | [0001](decisions/0001-template.md) | ADR Template | Template |
16
+
17
+ ## Feature Design Docs
18
+
19
+ | Feature | Status | Date | Reversibility |
20
+ |---------|--------|------|---------------|
21
+ | *None yet* | | | |
22
+
23
+ ## Creating Design Docs
24
+
25
+ Use the `design-doc` skill for structured decision documentation:
26
+
27
+ ```
28
+ skill({ name: "design-doc" })
29
+ ```
30
+
31
+ The skill guides you through:
32
+ 1. **Context** — What's the situation? What triggered this?
33
+ 2. **Decision Drivers** — Constraints, preferences, quality attributes
34
+ 3. **Options Analysis** — At least 2 options with pros/cons
35
+ 4. **Decision** — Choice + rationale + consequences + reversibility
36
+ 5. **Implementation Guidance** — Key interfaces, boundaries
37
+
38
+ ## When to Create a Design Doc
39
+
40
+ **CREATE when:**
41
+ - Making an architectural choice with multiple valid options
42
+ - Introducing a new pattern or abstraction
43
+ - Choosing between technologies, libraries, or approaches
44
+ - A decision will affect multiple features
45
+
46
+ **SKIP when:**
47
+ - Following an existing established pattern
48
+ - The decision is trivial or easily reversed
49
+ - A simple code comment would suffice
50
+
51
+ ## Integration with Autocode
52
+
53
+ The `autocode-implementation-planner` skill automatically reads linked design docs:
54
+ - Uses constraints as hard requirements
55
+ - Respects the chosen interfaces
56
+ - Stays within the defined boundaries
57
+ - Notes reversibility for future refactoring
docs/guides/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How-To Guides
2
+
3
+ Practical, goal-oriented guides for getting things done. Each guide addresses a specific task or workflow.
4
+
5
+ **Diataxis type:** How-to (action + application of skill)
6
+
7
+ ## Index
8
+
9
+ | Guide | Goal |
10
+ |-------|------|
11
+ | *None yet* | |
12
+
13
+ ## What Goes Here
14
+
15
+ - Step-by-step instructions for achieving a specific goal
16
+ - Operational procedures (deploy, configure, troubleshoot)
17
+ - Workflow walkthroughs
18
+
19
+ ## What Does NOT Go Here
20
+
21
+ - Learning-oriented content (tutorials)
22
+ - Factual descriptions of APIs/interfaces (go to `docs/references/`)
23
+ - Decision rationale (go to `docs/design-docs/`)
24
+ - Exploratory notes (go to `docs/exploration/`)
docs/learnings/F007-architecture.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - Runtime images for OpenEnv/HF deployments should copy both `.venv` and `data/databases` into `/app/env` so environment logic and SQLite assets ship together for executable episodes and health validation *(F007)*
docs/learnings/F007-conventions.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - Submission-facing notebooks must be Colab-ready by using relative project paths, cleared cell outputs, and a fixed section order (setup -> config -> connect -> train -> eval -> plot) to keep artifacts reproducible and reviewable *(F007)*
2
+ - README top sections should provide a three-command verification path (`uv sync`, `openenv validate`, `pytest`) before deep docs so judges can validate environment viability quickly *(F007)*
docs/learnings/F007-gotchas.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - Hardcoding port 8000 in container startup or health checks can cause false-negative readiness on HuggingFace Spaces where `PORT=7860` is injected at runtime *(F007)*
2
+ - API health checks can report green while episodes still fail unless probes also assert at least one bundled `*.sqlite` file exists under `data/databases` *(F007)*
docs/learnings/F007-integrations.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - HuggingFace Spaces deployment must treat `PORT` as runtime-configurable and wire both `HEALTHCHECK` and `uvicorn` startup to `${PORT:-8000}` for local/HF parity *(F007)*
2
+ - Training notebooks should include an explicit `SQLEnvClient` connect/reset/step smoke test before GRPO runs to fail fast when environment connectivity is broken *(F007)*
docs/learnings/F007-security.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - Run deployment containers as a non-root user (for example uid 10001) after `chown -R /app` to meet least-privilege expectations without breaking runtime file access *(F007)*
docs/learnings/F007-testing.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - Structural notebook rewrites should be guarded by a notebook-focused E2E suite plus full `tests/` regression to catch both training-flow and system-wide integration drift *(F007)*
docs/learnings/F007-workflow.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - Feature finalization should run both targeted E2E checks and full regression, then sync completion metadata in IMPLEMENTATION_SPEC execution status and FEATURES.json progress fields *(F007)*
docs/references/README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # References
2
+
3
+ External references and pointers that inform decisions.
4
+
5
+ Add links here when they become useful across multiple features.
evaluation/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Public evaluation API for the green agent wrapper."""
2
+
3
+ from .green_agent import EpisodeResult, EvaluationResult, Policy, RandomPolicy, evaluate
4
+
5
+ __all__ = [
6
+ "Policy",
7
+ "RandomPolicy",
8
+ "EpisodeResult",
9
+ "EvaluationResult",
10
+ "evaluate",
11
+ ]
evaluation/green_agent.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Core types for policy evaluation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ import random
7
+ import re
8
+ from typing import Callable, Protocol, runtime_checkable
9
+
10
+ try:
11
+ from ..models import SQLAction, SQLObservation
12
+ except ImportError:
13
+ try:
14
+ from models import SQLAction, SQLObservation # type: ignore[no-redef]
15
+ except ImportError:
16
+ from sql_env.models import SQLAction, SQLObservation # type: ignore[no-redef]
17
+
18
+
19
+ @runtime_checkable
20
+ class Policy(Protocol):
21
+ """Interface for policies used by the evaluator."""
22
+
23
+ def select_action(self, observation: SQLObservation) -> SQLAction:
24
+ """Choose one action for the current observation."""
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class EpisodeResult:
29
+ """Per-episode metrics from one evaluation run."""
30
+
31
+ episode_index: int
32
+ correct: bool
33
+ total_reward: float
34
+ steps: int
35
+ error: str | None = None
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class EvaluationResult:
40
+ """Aggregate evaluation metrics across all attempted episodes."""
41
+
42
+ success_rate: float
43
+ avg_reward: float
44
+ avg_steps: float
45
+ n_episodes: int
46
+ n_completed: int
47
+ episodes: list[EpisodeResult]
48
+
49
+
50
+ class RandomPolicy:
51
+ """Built-in random baseline policy."""
52
+
53
+ _EXPLORATION_ACTIONS = ("DESCRIBE", "SAMPLE", "QUERY")
54
+ _ROW_PATTERN = re.compile(r"^\d+\.\s*(.+)$")
55
+
56
+ def __init__(self, seed: int | None = None) -> None:
57
+ self._rng = random.Random(seed)
58
+
59
+ def select_action(self, observation: SQLObservation) -> SQLAction:
60
+ if observation.budget_remaining <= 1:
61
+ return SQLAction(
62
+ action_type="ANSWER",
63
+ argument=self._random_answer(observation.result),
64
+ )
65
+
66
+ action_type = self._rng.choice(self._EXPLORATION_ACTIONS)
67
+ table_name = self._random_table(observation.schema_info)
68
+ if action_type == "QUERY":
69
+ safe_table_name = table_name.replace('"', '""')
70
+ argument = f'SELECT * FROM "{safe_table_name}" LIMIT 5'
71
+ else:
72
+ argument = table_name
73
+
74
+ return SQLAction(action_type=action_type, argument=argument)
75
+
76
+ def _random_table(self, schema_info: str) -> str:
77
+ table_names = self._extract_table_names(schema_info)
78
+ if not table_names:
79
+ return "unknown"
80
+ return self._rng.choice(table_names)
81
+
82
+ @classmethod
83
+ def _extract_table_names(cls, schema_info: str) -> list[str]:
84
+ table_names: list[str] = []
85
+ for line in schema_info.splitlines():
86
+ stripped = line.strip()
87
+ if not stripped.startswith("- "):
88
+ continue
89
+ candidate = stripped[2:]
90
+ if ":" in candidate:
91
+ candidate = candidate.split(":", maxsplit=1)[0]
92
+ candidate = candidate.strip()
93
+ if candidate:
94
+ table_names.append(candidate)
95
+ return table_names
96
+
97
+ def _random_answer(self, result_text: str) -> str:
98
+ candidates = self._extract_answer_candidates(result_text)
99
+ if not candidates:
100
+ return "unknown"
101
+ return self._rng.choice(candidates)
102
+
103
+ @classmethod
104
+ def _extract_answer_candidates(cls, result_text: str) -> list[str]:
105
+ candidates: list[str] = []
106
+ for line in result_text.splitlines():
107
+ match = cls._ROW_PATTERN.match(line.strip())
108
+ if not match:
109
+ continue
110
+ row_value = match.group(1).strip()
111
+ if not row_value:
112
+ continue
113
+ candidates.append(row_value)
114
+ split_values = [value.strip() for value in row_value.split("|")]
115
+ candidates.extend([value for value in split_values if value])
116
+ return candidates
117
+
118
+
119
+ def evaluate(
120
+ env: object,
121
+ policy: Policy,
122
+ n_episodes: int = 100,
123
+ *,
124
+ seed: int | None = None,
125
+ progress_callback: Callable[[int, int], None] | None = None,
126
+ ) -> EvaluationResult:
127
+ """Run policy evaluation over multiple episodes with error isolation."""
128
+ if n_episodes < 0:
129
+ raise ValueError("n_episodes must be >= 0")
130
+
131
+ if n_episodes == 0:
132
+ return EvaluationResult(
133
+ success_rate=0.0,
134
+ avg_reward=0.0,
135
+ avg_steps=0.0,
136
+ n_episodes=0,
137
+ n_completed=0,
138
+ episodes=[],
139
+ )
140
+
141
+ episodes: list[EpisodeResult] = []
142
+ for episode_index in range(n_episodes):
143
+ try:
144
+ episode_seed = seed + episode_index if seed is not None else None
145
+ observation = env.reset(seed=episode_seed)
146
+ total_reward = 0.0
147
+ steps = 0
148
+
149
+ while not observation.done:
150
+ action = policy.select_action(observation)
151
+ observation = env.step(action)
152
+ total_reward += observation.reward or 0.0
153
+ steps += 1
154
+
155
+ episodes.append(
156
+ EpisodeResult(
157
+ episode_index=episode_index,
158
+ correct=(observation.reward or 0.0) > 0.0,
159
+ total_reward=total_reward,
160
+ steps=steps,
161
+ )
162
+ )
163
+ except Exception as exc:
164
+ episodes.append(
165
+ EpisodeResult(
166
+ episode_index=episode_index,
167
+ correct=False,
168
+ total_reward=0.0,
169
+ steps=0,
170
+ error=str(exc),
171
+ )
172
+ )
173
+
174
+ if progress_callback is not None:
175
+ progress_callback(episode_index + 1, n_episodes)
176
+
177
+ completed_episodes = [episode for episode in episodes if episode.error is None]
178
+ n_completed = len(completed_episodes)
179
+ if n_completed == 0:
180
+ return EvaluationResult(
181
+ success_rate=0.0,
182
+ avg_reward=0.0,
183
+ avg_steps=0.0,
184
+ n_episodes=n_episodes,
185
+ n_completed=0,
186
+ episodes=episodes,
187
+ )
188
+
189
+ successful = sum(1 for episode in completed_episodes if episode.correct)
190
+ avg_reward = sum(episode.total_reward for episode in completed_episodes) / n_completed
191
+ avg_steps = sum(episode.steps for episode in completed_episodes) / n_completed
192
+ return EvaluationResult(
193
+ success_rate=successful / n_completed,
194
+ avg_reward=avg_reward,
195
+ avg_steps=avg_steps,
196
+ n_episodes=n_episodes,
197
+ n_completed=n_completed,
198
+ episodes=episodes,
199
+ )
models.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLEnv Pydantic models — the data contracts between client and server.
3
+
4
+ These models define the typed interface for the SQLEnv RL environment,
5
+ following the OpenEnv pattern (see OpenEnv Tutorial for reference):
6
+
7
+ Action — what the agent sends each step
8
+ Observation — what the agent receives back
9
+ State — episode metadata (exposed via the state endpoint)
10
+
11
+ RL terminology — state vs observation
12
+ ─────────────────────────────────────
13
+ In RL theory:
14
+
15
+ State (s) A COMPLETE description of the world. Nothing is hidden.
16
+ Observation (o) A PARTIAL description of a state, which may omit info.
17
+
18
+ In SQLEnv these map to:
19
+
20
+ EpisodeContext The full RL state (s). Lives on the server only.
21
+ Contains gold answers, reward accumulators, DB
22
+ connection, full query history — everything needed
23
+ to advance the simulation and compute rewards.
24
+
25
+ SQLObservation The observation (o). Sent to the agent over the wire.
26
+ Contains the question, truncated results, revealed
27
+ schema, budget, and action history. The agent NEVER
28
+ sees the gold answer, progress scores, or full DB.
29
+
30
+ SQLState OpenEnv's "State" base class — lightweight episode
31
+ metadata (episode_id, step_count). This is NOT the
32
+ RL state; it is a convenience for logging/debugging.
33
+
34
+ This separation is what makes SQLEnv a POMDP: the agent must act under
35
+ uncertainty, which is what makes exploration necessary and learnable.
36
+ """
37
+
38
+ import sqlite3
39
+ from dataclasses import dataclass, field as dataclass_field
40
+
41
+ from openenv.core.env_server.interfaces import Message
42
+ from openenv.core.env_server.types import Action, Observation, State
43
+ from pydantic import Field
44
+ import torch
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Wire types: these cross the HTTP boundary between client and server
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ class SQLAction(Action):
52
+ """What the agent sends each step.
53
+
54
+ The action space is intentionally small and structured so agents can
55
+ explicitly control the environment loop.
56
+ """
57
+
58
+ action_type: str = Field(
59
+ ...,
60
+ description="One of: DESCRIBE, SAMPLE, QUERY, ANSWER",
61
+ )
62
+ argument: str = Field(
63
+ ...,
64
+ description=(
65
+ "Table name (DESCRIBE/SAMPLE), SQL string (QUERY), "
66
+ "or answer value (ANSWER)."
67
+ ),
68
+ )
69
+
70
+
71
+ class SQLObservation(Observation):
72
+ """What the agent receives after each step.
73
+
74
+ This is the agent's PARTIAL view of the world. Key design choices:
75
+
76
+ - schema_info starts with table names only; columns are revealed
77
+ incrementally as the agent DESCRIBEs tables.
78
+ - result is always a truncated string, never raw data. The agent sees
79
+ what a human analyst would see in a terminal — at most N rows of
80
+ formatted text. This keeps the observation bounded and forces the
81
+ agent to reason about what it sees rather than brute-force scanning.
82
+ - action_history gives the agent memory of its own trajectory without
83
+ the server needing to re-send full results from prior steps.
84
+ """
85
+
86
+ # Inherited from Observation: done (bool), reward (float | None)
87
+ question: str = Field(..., description="The NL question to answer")
88
+ schema_info: str = Field(..., description="Known schema information")
89
+ result: str = Field(default="", description="Result of the last action")
90
+ error: str = Field(default="", description="Error message if action failed")
91
+ step_count: int = Field(default=0, description="Current step number")
92
+ budget_remaining: int = Field(default=0, description="Steps remaining")
93
+ action_history: list[str] = Field(
94
+ default_factory=list,
95
+ description="Summary of previous actions",
96
+ )
97
+
98
+
99
+ class SQLState(State):
100
+ """Episode metadata exposed via GET /state.
101
+
102
+ This is the minimal public state — enough for logging and debugging,
103
+ but NOT the full internal bookkeeping (see EpisodeContext below).
104
+ """
105
+
106
+ # # Inherited from State: episode_id (str | None), step_count (int)
107
+ # game_name: str = Field(
108
+ # "sql_env", description="Name of the game/environment"
109
+ # )
110
+ history_messages: list[Message] = Field(default_factory=list)
111
+ history_tokens: list[torch.Tensor] = Field(
112
+ default_factory=list
113
+ ) # Same len as messages
114
+ current_action_type: str = Field(
115
+ default="QUERY",
116
+ description="Current action type: DESCRIBE, SAMPLE, QUERY, or ANSWER",
117
+ )
118
+
119
+
120
+ @dataclass
121
+ class QuestionRecord:
122
+ """One question from the Spider dataset."""
123
+
124
+ question_id: str
125
+ question_text: str
126
+ database_name: str
127
+ gold_sql: str
128
+ gold_answer: str
129
+ answer_type: str
130
+ difficulty: str
131
+ tables_involved: list[str]
132
+
133
+
134
+ @dataclass
135
+ class EpisodeContext:
136
+ """Per-episode server-side state (never sent to agent)."""
137
+
138
+ episode_id: str
139
+ db_connection: sqlite3.Connection
140
+ question_record: QuestionRecord
141
+ step_count: int = 0
142
+ budget: int = 15
143
+ described_tables: set[str] = dataclass_field(default_factory=set)
144
+ action_log: list[str] = dataclass_field(default_factory=list)
145
+ done: bool = False
146
+ gold_answer: str | None = None
147
+ gold_rows: list[tuple] = dataclass_field(default_factory=list)
148
+ query_hashes: set[str] = dataclass_field(default_factory=set)
149
+ best_progress: float = 0.0
150
+ cumulative_step_reward: float = 0.0
151
+ cumulative_new_info_reward: float = 0.0
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Conceptual internal state: what the server tracks per episode
156
+ # ---------------------------------------------------------------------------
157
+ #
158
+ # The classes below are a DESIGN OUTLINE, not runnable implementation.
159
+ # They describe the information the server needs to maintain during an
160
+ # episode so that it can:
161
+ #
162
+ # 1. Execute actions against the database
163
+ # 2. Compute the 3-layer reward signal
164
+ # 3. Enforce budget limits and anti-gaming measures
165
+ # 4. Build the next observation for the agent
166
+ #
167
+ # These are SERVER-ONLY — they never cross the HTTP boundary.
168
+ # Implementation will follow in server/environment.py during Phase 2.
169
+ #
170
+ #
171
+ # EpisodeContext — Per-episode server state
172
+ # ──────────────────────────────────────────
173
+ # Conceptual fields:
174
+ #
175
+ # episode_id: str
176
+ # Unique identifier for this episode (UUID).
177
+ #
178
+ # question_record: QuestionRecord
179
+ # The selected question and its metadata:
180
+ # - question_id, question_text, database_name
181
+ # - gold_sql, gold_answer, answer_type, difficulty
182
+ # Loaded from the question set JSON at reset().
183
+ #
184
+ # db_connection: sqlite3.Connection
185
+ # Read-only connection to the episode's SQLite database.
186
+ # Opened at reset(), closed when the episode ends.
187
+ # Enforces: read-only mode, statement timeout (5s), SELECT-only.
188
+ #
189
+ # step_count: int
190
+ # Current step number (0 at reset, incremented each step()).
191
+ #
192
+ # budget: int
193
+ # Steps remaining. Starts at max_steps (default 15).
194
+ # Decremented on each non-ANSWER action. Episode terminates
195
+ # when budget hits 0 without an ANSWER.
196
+ #
197
+ # --- Schema tracking (for observation building) ---
198
+ #
199
+ # known_tables: set[str]
200
+ # Table names revealed to the agent. Starts with ALL table names
201
+ # (agent sees table names at reset), but column details are hidden.
202
+ #
203
+ # described_tables: dict[str, list[ColumnInfo]]
204
+ # Tables the agent has DESCRIBEd → their column info.
205
+ # Used to build the incrementally-revealed schema_info string.
206
+ #
207
+ # --- Reward tracking (Layer 1: Operational) ---
208
+ #
209
+ # query_hashes: set[str]
210
+ # Hashes of all SQL queries executed this episode.
211
+ # Used for repeat detection (r_repeat penalty).
212
+ #
213
+ # explored_entities: set[str]
214
+ # Set of "table.column" strings the agent has discovered.
215
+ # Used for r_new_info reward. Capped at 0.10 total per episode.
216
+ #
217
+ # cumulative_new_info_reward: float
218
+ # Running total of r_new_info awarded. Once this reaches the cap
219
+ # (0.10), no more r_new_info is given.
220
+ #
221
+ # --- Reward tracking (Layer 2: Progress) ---
222
+ #
223
+ # gold_result: Any
224
+ # The result of running gold_sql on the database, computed once
225
+ # at reset(). This is the reference for progress comparison.
226
+ #
227
+ # best_progress: float
228
+ # Best binned progress score achieved so far (one of
229
+ # {0, 0.25, 0.5, 0.75, 1.0}). Reward is given only when
230
+ # a QUERY result IMPROVES over this value.
231
+ #
232
+ # --- Reward tracking (aggregates) ---
233
+ #
234
+ # cumulative_step_reward: float
235
+ # Running sum of all per-step rewards (Layers 1 + 2).
236
+ # Clamped to [-0.2, +0.5] at episode end.
237
+ #
238
+ # --- Action history (for observation) ---
239
+ #
240
+ # action_log: list[str]
241
+ # Human-readable summaries of each action taken, e.g.:
242
+ # "DESCRIBE employees → 5 columns"
243
+ # "QUERY: SELECT COUNT(*) FROM orders → 42"
244
+ # "ANSWER: 42 → correct"
245
+ # Sent to the agent in SQLObservation.action_history so it has
246
+ # memory of its own trajectory.
247
+ #
248
+ #
249
+ # QuestionRecord — Metadata for a single question
250
+ # ─────────────────────────────────────────────────
251
+ # Conceptual fields:
252
+ #
253
+ # question_id: str e.g. "spider_dev_042"
254
+ # question_text: str The natural language question
255
+ # database_name: str Which SQLite database to load
256
+ # gold_sql: str Reference SQL (hidden from agent)
257
+ # gold_answer: str Expected answer (hidden from agent)
258
+ # answer_type: str One of: integer, float, string, list, table
259
+ # difficulty: str One of: easy, medium, hard
260
+ # tables_involved: list[str] Which tables the gold query touches
261
+ #
262
+ #
263
+ # ColumnInfo — Schema detail for a single column
264
+ # ───────────────────────────────────────────────
265
+ # Conceptual fields:
266
+ #
267
+ # name: str Column name
268
+ # dtype: str SQLite type (TEXT, INTEGER, REAL, etc.)
269
+ # is_primary_key: bool Whether this is a PK
270
+ # is_foreign_key: bool Whether this is a FK
271
+ # references: str | None "table.column" if FK, else None
272
+ #
notebooks/train_grpo.ipynb ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Training a SQL Agent with GRPO + SQLEnv\n",
8
+ "\n",
9
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/)\n",
10
+ "\n",
11
+ "This notebook is a Colab-ready walkthrough for training an agent against SQLEnv. It follows setup, configuration, connectivity check, training, evaluation, and plotting in one linear flow."
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "markdown",
16
+ "metadata": {},
17
+ "source": [
18
+ "## 1) Setup\n",
19
+ "Install dependencies and (optionally) clone the repository when running in a fresh Colab runtime."
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "%pip install -q \"trl>=0.9.0\" \"transformers>=4.46.0\" \"datasets>=3.0.0\" \"matplotlib>=3.8.0\" \"openenv>=0.1.9\" \"websockets>=15.0.1\"\n",
29
+ "\n",
30
+ "# Optional in Colab if project files are not already present:\n",
31
+ "# !git clone https://github.com/<your-org>/<your-repo>.git\n",
32
+ "# %cd <your-repo>"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "metadata": {},
38
+ "source": [
39
+ "## 2) Configuration\n",
40
+ "Set environment URL, model, and core training hyperparameters."
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "from __future__ import annotations\n",
50
+ "\n",
51
+ "import matplotlib.pyplot as plt\n",
52
+ "\n",
53
+ "from sql_env.client import SQLEnvClient\n",
54
+ "from sql_env.training.config import GRPOConfig\n",
55
+ "from sql_env.training.data_loading import load_model_and_tokenizer, load_question_prompts\n",
56
+ "from sql_env.training.notebook_pipeline import build_trainer, run_training_with_metrics, sample_random_baseline\n",
57
+ "from sql_env.training.rewards import reward_correctness, reward_operational, reward_progress\n",
58
+ "\n",
59
+ "try:\n",
60
+ " from trl import GRPOConfig as TRLGRPOConfig\n",
61
+ " from trl import GRPOTrainer\n",
62
+ "except Exception as exc:\n",
63
+ " raise RuntimeError(\n",
64
+ " \"TRL is required for this notebook. Install dependencies in the Setup cell first.\"\n",
65
+ " ) from exc\n",
66
+ "\n",
67
+ "SPACE_URL = \"ws://localhost:8000/ws\"\n",
68
+ "MODEL_NAME = \"Qwen/Qwen3-0.6B\"\n",
69
+ "\n",
70
+ "# TODO: update after F006 if artifact paths or defaults change.\n",
71
+ "config = GRPOConfig(\n",
72
+ " questions_path=\"data/questions/questions_train.json\",\n",
73
+ " db_dir=\"data/databases\",\n",
74
+ " output_dir=\"outputs/grpo_run\",\n",
75
+ " model_name=MODEL_NAME,\n",
76
+ " num_train_epochs=1,\n",
77
+ " per_device_train_batch_size=1,\n",
78
+ " gradient_accumulation_steps=1,\n",
79
+ " num_generations=2,\n",
80
+ " step_budget=10,\n",
81
+ ")"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "markdown",
86
+ "metadata": {},
87
+ "source": [
88
+ "## 3) Connect and Smoke Test\n",
89
+ "Confirm the environment is reachable and can execute a short episode."
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "client = SQLEnvClient(base_url=SPACE_URL)\n",
99
+ "client.connect()\n",
100
+ "obs = client.reset(seed=42)\n",
101
+ "print(\"Question:\", obs.question)\n",
102
+ "\n",
103
+ "_ = client.step(\"DESCRIBE student\")\n",
104
+ "_ = client.step(\"SAMPLE student\")\n",
105
+ "\n",
106
+ "client.close()"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "metadata": {},
112
+ "source": [
113
+ "## 4) Train with GRPO\n",
114
+ "Build a trainer and run a short training pass."
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "metadata": {},
121
+ "outputs": [],
122
+ "source": [
123
+ "model, tokenizer = load_model_and_tokenizer(config.model_name)\n",
124
+ "prompts = load_question_prompts(config.questions_path, config.difficulty_filter)\n",
125
+ "\n",
126
+ "before_rollouts = sample_random_baseline([item[\"prompt\"] for item in prompts[:8]], step_budget=config.step_budget, seed=config.seed)\n",
127
+ "\n",
128
+ "reward_funcs = [reward_correctness, reward_progress, reward_operational]\n",
129
+ "trainer = build_trainer(\n",
130
+ " trl_grpo_config_cls=TRLGRPOConfig,\n",
131
+ " grpo_trainer_cls=GRPOTrainer,\n",
132
+ " model=model,\n",
133
+ " tokenizer=tokenizer,\n",
134
+ " prompts=prompts,\n",
135
+ " config=config,\n",
136
+ " reward_funcs=reward_funcs,\n",
137
+ ")\n",
138
+ "\n",
139
+ "# TODO: update after F006 if training entry points are renamed.\n",
140
+ "train_output, steps, rewards = run_training_with_metrics(trainer)\n",
141
+ "print(train_output)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "markdown",
146
+ "metadata": {},
147
+ "source": [
148
+ "## 5) Evaluate\n",
149
+ "Run a quick held-out evaluation summary after training."
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "held_out_prompts = [item[\"prompt\"] for item in load_question_prompts(\"data/questions/questions_eval.json\", None)[:16]]\n",
159
+ "after_rollouts = sample_random_baseline(held_out_prompts, step_budget=config.step_budget, seed=config.seed + 1)\n",
160
+ "\n",
161
+ "baseline_avg_steps = sum(len(item[\"completion\"].splitlines()) for item in before_rollouts) / max(1, len(before_rollouts))\n",
162
+ "eval_avg_steps = sum(len(item[\"completion\"].splitlines()) for item in after_rollouts) / max(1, len(after_rollouts))\n",
163
+ "\n",
164
+ "print({\n",
165
+ " \"baseline_avg_steps\": round(baseline_avg_steps, 2),\n",
166
+ " \"held_out_avg_steps\": round(eval_avg_steps, 2),\n",
167
+ " \"eval_count\": len(after_rollouts),\n",
168
+ "})"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "markdown",
173
+ "metadata": {},
174
+ "source": [
175
+ "## 6) Plot Results\n",
176
+ "Visualize the reward trend collected during training."
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": null,
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "if steps and rewards:\n",
186
+ " plt.figure(figsize=(8, 4))\n",
187
+ " plt.plot(steps, rewards, marker=\"o\", linewidth=1.5)\n",
188
+ " plt.title(\"GRPO Reward Trend\")\n",
189
+ " plt.xlabel(\"Training Step\")\n",
190
+ " plt.ylabel(\"Reward\")\n",
191
+ " plt.grid(alpha=0.3)\n",
192
+ " plt.show()\n",
193
+ "else:\n",
194
+ " print(\"No reward points available yet.\")"
195
+ ]
196
+ },
197
+ {
198
+ "cell_type": "markdown",
199
+ "metadata": {},
200
+ "source": [
201
+ "## Next Steps\n",
202
+ "- Full training workflow: `specs/F006-IMPLEMENTATION_SPEC.md`\n",
203
+ "- Deployment package: `specs/F007-IMPLEMENTATION_SPEC.md`\n",
204
+ "- Live environment endpoint: replace `SPACE_URL` with your HF Space WebSocket URL\n",
205
+ "- Blog narrative source: `docs/blog-outline.md`"
206
+ ]
207
+ }
208
+ ],
209
+ "metadata": {
210
+ "colab": {
211
+ "name": "train_grpo.ipynb",
212
+ "provenance": []
213
+ },
214
+ "kernelspec": {
215
+ "display_name": "Python 3",
216
+ "language": "python",
217
+ "name": "python3"
218
+ },
219
+ "language_info": {
220
+ "name": "python",
221
+ "version": "3.12"
222
+ }
223
+ },
224
+ "nbformat": 4,
225
+ "nbformat_minor": 5
226
+ }
opencode.jsonc ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://opencode.ai/config.json",
3
+ // ============================================================================
4
+ // FULLSTACK AUTOCODE TEMPLATE
5
+ // ============================================================================
6
+ // For: FastAPI + Next.js projects with autonomous autocode workflow
7
+ // Copy to project root: cp ~/.config/opencode/templates/fullstack-autocode.jsonc ./opencode.jsonc
8
+ //
9
+ // This template is PERMISSIVE because verification comes from:
10
+ // - VERIFICATION_SPEC.md (independent test criteria)
11
+ // - review-modern subagent (auto-fix + bounded iteration)
12
+ // - git history (atomic commits per step)
13
+ //
14
+ // NOT from permission prompts.
15
+ //
16
+ // For headless/CLI automation (ralph-loop.sh, opencode run), all tools that
17
+ // might prompt must be pre-approved. See docs/opencode-server-mode.md for
18
+ // details on server mode alternatives.
19
+ // ============================================================================
20
+
21
+ "permission": {
22
+ // Allow reading from global OpenCode assets (skills, commands, agents, scripts)
23
+ // Also allow specs/** and vision/** to prevent sandbox false-positives
24
+ // in parallel-feature clones where OpenCode may misidentify project root
25
+ "external_directory": {
26
+ "~/.config/opencode/skills/**": "allow",
27
+ "~/.config/opencode/commands/**": "allow",
28
+ "~/.config/opencode/agents/**": "allow",
29
+ "~/.config/opencode/scripts/**": "allow",
30
+ "specs/**": "allow",
31
+ "vision/**": "allow"
32
+ },
33
+
34
+ "read": "allow",
35
+ "glob": "allow",
36
+ "grep": "allow", // Needed for codebase exploration
37
+ "list": "allow", // Directory listing tool
38
+ "edit": "allow", // Trust git as safety net
39
+
40
+ // Allow subagent invocation for autonomous workflows
41
+ // CRITICAL: Without this, /autocode-next-step will hang in CLI mode
42
+ "task": "allow",
43
+
44
+ // Allow skill loading (for complex multi-skill workflows)
45
+ "skill": "allow",
46
+
47
+ // Allow web fetching for documentation lookups (optional, set to "ask" if concerned)
48
+ "webfetch": "allow",
49
+
50
+ "bash": {
51
+ // Catch-all: ask for anything not explicitly allowed below
52
+ // This ensures unknown commands still prompt rather than fail silently
53
+ "*": "ask",
54
+
55
+ // ========================================================================
56
+ // TASK RUNNERS
57
+ // ========================================================================
58
+ "task": "allow",
59
+ "task *": "allow",
60
+ "make": "allow",
61
+ "make *": "allow",
62
+
63
+ // ========================================================================
64
+ // PYTHON / UV
65
+ // ========================================================================
66
+ "uv": "allow",
67
+ "uv *": "allow",
68
+ "uv sync": "allow",
69
+ "uv venv": "allow",
70
+ "uv run *": "allow",
71
+ "uv pip *": "allow",
72
+ "uv add *": "allow",
73
+ "uv remove *": "allow",
74
+ "uv lock *": "allow",
75
+
76
+ // Direct test/lint invocation (used by /techdebt and verification)
77
+ "uv run pytest": "allow",
78
+ "uv run pytest *": "allow",
79
+ "uv run ruff *": "allow",
80
+ "uv run mypy *": "allow",
81
+ "uv run black *": "allow",
82
+
83
+ // Direct invocation without uv (for projects not using uv)
84
+ "pytest": "allow",
85
+ "pytest *": "allow",
86
+ "ruff": "allow",
87
+ "ruff *": "allow",
88
+ "ruff check *": "allow",
89
+ "mypy": "allow",
90
+ "mypy *": "allow",
91
+ "black *": "allow",
92
+ "isort *": "allow",
93
+
94
+ // ========================================================================
95
+ // NODE / NPM / BUN
96
+ // ========================================================================
97
+ "npm install": "allow",
98
+ "npm ci": "allow",
99
+ "npm run dev": "allow",
100
+ "npm run build": "allow",
101
+ "npm run lint": "allow",
102
+ "npm run test": "allow",
103
+ "npm run test *": "allow",
104
+ "npm run start": "allow",
105
+ "npm run format": "allow",
106
+ "npm run typecheck": "allow",
107
+ "npm run typecheck *": "allow",
108
+
109
+ // ESLint direct invocation (used by /techdebt)
110
+ "npx eslint": "allow",
111
+ "npx eslint *": "allow",
112
+ "npm outdated": "allow",
113
+ "npm ls *": "allow",
114
+ "npm audit": "allow",
115
+ "npm audit *": "allow",
116
+
117
+ "bun install": "allow",
118
+ "bun run *": "allow",
119
+ "bun test": "allow",
120
+ "bun test *": "allow",
121
+ "bun add *": "allow",
122
+ "bun remove *": "allow",
123
+
124
+ // ========================================================================
125
+ // GIT - Full workflow (autonomous commits/push)
126
+ // ========================================================================
127
+ "git add *": "allow",
128
+ "git commit *": "allow",
129
+ "git push": "allow",
130
+ "git push *": "allow",
131
+ "git checkout *": "allow",
132
+ "git switch *": "allow",
133
+ "git branch": "allow",
134
+ "git branch *": "allow",
135
+ "git stash *": "allow",
136
+ "git pull": "allow",
137
+ "git pull *": "allow",
138
+ "git fetch *": "allow",
139
+ "git merge *": "allow",
140
+ "git rebase *": "allow",
141
+ "git tag *": "allow",
142
+ "git cherry-pick *": "allow",
143
+
144
+ // Git diagnostics (used by /commit-push-pr and /autocode-next-step)
145
+ "git status": "allow",
146
+ "git status *": "allow",
147
+ "git diff": "allow",
148
+ "git diff *": "allow",
149
+ "git log *": "allow",
150
+ "git rev-parse *": "allow",
151
+ "git rev-list *": "allow",
152
+ "git remote *": "allow",
153
+ "git show *": "allow",
154
+ "git ls-remote *": "allow",
155
+
156
+ // EXPLICIT DENY: Force push (destructive, stays as ask)
157
+ "git push --force": "ask",
158
+ "git push --force *": "ask",
159
+ "git push -f": "ask",
160
+ "git push -f *": "ask",
161
+
162
+ // ========================================================================
163
+ // GITHUB CLI - PR workflow (no merge)
164
+ // ========================================================================
165
+ "gh auth status": "allow",
166
+ "gh pr create *": "allow",
167
+ "gh pr view *": "allow",
168
+ "gh pr list *": "allow",
169
+ "gh pr checkout *": "allow",
170
+ "gh pr diff *": "allow",
171
+ "gh pr status": "allow",
172
+ "gh pr ready *": "allow",
173
+ "gh pr comment *": "allow",
174
+ "gh issue *": "allow",
175
+ "gh repo view *": "allow",
176
+ "gh repo clone *": "allow",
177
+
178
+ // EXPLICIT DENY: Merge and dangerous API calls (stay as ask)
179
+ // These inherit "ask" from global "*": "ask", but listed for clarity
180
+ // "gh pr merge *": "ask"
181
+ // "gh api *": "ask"
182
+
183
+ // ========================================================================
184
+ // DOCKER (common safe commands)
185
+ // ========================================================================
186
+ "docker build *": "allow",
187
+ "docker run *": "allow",
188
+ "docker ps": "allow",
189
+ "docker ps *": "allow",
190
+ "docker images": "allow",
191
+ "docker images *": "allow",
192
+ "docker logs *": "allow",
193
+ "docker exec *": "allow",
194
+ "docker stop *": "allow",
195
+ "docker start *": "allow",
196
+ "docker restart *": "allow",
197
+ "docker rm *": "allow",
198
+ "docker rmi *": "allow",
199
+ "docker compose *": "allow",
200
+ "docker-compose *": "allow",
201
+
202
+ // ========================================================================
203
+ // PYTHON (JSON validation, scripting)
204
+ // ========================================================================
205
+ "python3": "allow",
206
+ "python3 *": "allow",
207
+ "python": "allow",
208
+ "python *": "allow",
209
+
210
+ // ========================================================================
211
+ // FILE OPERATIONS (safe, commonly needed during development)
212
+ // ========================================================================
213
+ "mv *": "allow",
214
+ "mkdir *": "allow",
215
+ "mkdir -p *": "allow",
216
+ "cp *": "allow",
217
+ "cp -r *": "allow",
218
+ "rm *": "allow",
219
+ "rm -r *": "allow",
220
+ "rm -rf *": "allow",
221
+ "touch *": "allow",
222
+
223
+ // ========================================================================
224
+ // FILE/DIR CHECKS (used by scripts and agents)
225
+ // ========================================================================
226
+ "test *": "allow",
227
+ "test -f *": "allow",
228
+ "test -d *": "allow",
229
+ "test -e *": "allow",
230
+ "[ *": "allow",
231
+
232
+ // ========================================================================
233
+ // DIAGNOSTICS (inherited from global, but explicit for clarity)
234
+ // ========================================================================
235
+ "ls": "allow",
236
+ "ls *": "allow",
237
+ "cat *": "allow",
238
+ "head *": "allow",
239
+ "tail *": "allow",
240
+ "which *": "allow",
241
+ "pwd": "allow",
242
+ "echo *": "allow",
243
+ "tr *": "allow",
244
+ "wc *": "allow",
245
+ "true": "allow",
246
+ "false": "allow",
247
+ "grep *": "allow",
248
+ "find *": "allow",
249
+ "tree *": "allow",
250
+ "stat *": "allow",
251
+ "file *": "allow",
252
+ "basename *": "allow",
253
+ "dirname *": "allow",
254
+ "realpath *": "allow",
255
+
256
+ // ========================================================================
257
+ // RUST / CARGO (if applicable)
258
+ // ========================================================================
259
+ "cargo": "allow",
260
+ "cargo *": "allow",
261
+ "cargo build": "allow",
262
+ "cargo build *": "allow",
263
+ "cargo test": "allow",
264
+ "cargo test *": "allow",
265
+ "cargo clippy": "allow",
266
+ "cargo clippy *": "allow",
267
+ "cargo fmt": "allow",
268
+ "cargo fmt *": "allow",
269
+ "cargo check": "allow",
270
+ "cargo check *": "allow",
271
+ "cargo run": "allow",
272
+ "cargo run *": "allow",
273
+
274
+ // ========================================================================
275
+ // UTILITIES (timestamps for specs)
276
+ // ========================================================================
277
+ "date": "allow",
278
+ "date *": "allow"
279
+ }
280
+ },
281
+
282
+ "instructions": ["AGENTS.md"]
283
+ }
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: sql_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
progress.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-28T18:00:24+0100] === Ralph Loop Start ===
2
+ [2026-03-28T18:00:24+0100] Spec: specs/F007-IMPLEMENTATION_SPEC.md
3
+ [2026-03-28T18:00:24+0100] Model: openai/gpt-5.3-codex
4
+ [2026-03-28T18:00:24+0100] Max iterations: 20
5
+ [2026-03-28T18:04:33+0100] Iteration 1/20 | Step: 1.1 | action=continue
6
+ [2026-03-28T18:08:10+0100] Iteration 2/20 | Step: 1.3 | action=continue
7
+ [2026-03-28T18:10:48+0100] Iteration 3/20 | Step: 1.3 | action=continue
8
+ [2026-03-28T18:14:57+0100] Iteration 4/20 | Step: 2.1 | action=continue
9
+ [2026-03-28T18:17:25+0100] Iteration 5/20 | Step: 2.2 | action=continue
10
+ [2026-03-28T18:17:25+0100] === Ralph Loop Aborted === reason=Finalization stuck after 5 iterations
11
+ [2026-03-28T21:04:43+0100] === Ralph Loop Start ===
12
+ [2026-03-28T21:04:43+0100] Spec: specs/F007-IMPLEMENTATION_SPEC.md
13
+ [2026-03-28T21:04:43+0100] Model: openai/gpt-5.3-codex
14
+ [2026-03-28T21:04:43+0100] Max iterations: 20
15
+ [2026-03-28T21:09:06+0100] Iteration 1/20 | Step: 3.1 | action=continue
16
+ [2026-03-28T21:40:17+0100] Iteration 2/20 | Step: unknown | action=blocked | reason=External deployment verification is blocked by GHCR access/auth failure (403 pulling base image), so verifier gate cannot approve final completion yet.
17
+ [2026-03-28T21:44:42+0100] Iteration 3/20 | Step: unknown | action=blocked | reason=External credential/access dependency remains: need authenticated GHCR pull and HF push evidence (build+push attempt) to satisfy final verifier approval.
18
+ [2026-03-28T22:05:11+0100] Iteration 4/20 | Step: unknown | action=blocked | reason=Awaiting user-side authenticated deployment evidence: successful GHCR-authenticated `uv run openenv build -t openenv-sql-env-f007-hf-submission` and `uv run openenv push` output before verifier/final completion can proceed.
19
+ [2026-03-28T22:49:48+0100] Iteration 5/20 | Step: unknown | action=blocked | reason=Awaiting user-provided authenticated external deployment evidence (GHCR-authenticated `openenv build` success and `openenv push` output) to satisfy final verifier gate for F007.
20
+ [2026-03-28T22:50:20+0100] === Ralph Loop Start ===
21
+ [2026-03-28T22:50:20+0100] Spec: specs/F007-IMPLEMENTATION_SPEC.md
22
+ [2026-03-28T22:50:20+0100] Model: openai/gpt-5.3-codex
23
+ [2026-03-28T22:50:20+0100] Max iterations: 20
24
+ [2026-03-28T22:54:21+0100] Iteration 1/20 | Step: unknown | action=blocked | reason=Missing external authenticated deployment evidence (GHCR-authenticated build and Hugging Face push output) required by F007 final verification gate.
25
+ [2026-03-28T23:00:44+0100] Iteration 2/20 | Step: unknown | action=blocked | reason=Authenticated deployment attempts now run, but `openenv build` fails with local Docker disk exhaustion (`No space left on device`) and `openenv push` fails with HF namespace permission (`403 Forbidden` for `hjerpe/sql_env`) plus README frontmatter metadata validation (`colorFrom`/`colorTo`), so final verification gate cannot pass without external intervention.
26
+ [2026-03-28T23:14:35+0100] === Ralph Loop Start ===
27
+ [2026-03-28T23:14:35+0100] Spec: specs/F007-IMPLEMENTATION_SPEC.md
28
+ [2026-03-28T23:14:35+0100] Model: openai/gpt-5.3-codex
29
+ [2026-03-28T23:14:35+0100] Max iterations: 20
pyproject.toml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sql-env"
7
+ version = "0.1.0"
8
+ description = "Interactive SQL exploration RL environment for the OpenEnv Challenge"
9
+ requires-python = ">=3.11,<3.13"
10
+ dependencies = [
11
+ # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
12
+ "openenv-core[core]>=0.2.1",
13
+ # Environment-specific dependencies
14
+ "pydantic>=2.0.0",
15
+ "fastapi>=0.104.0",
16
+ "uvicorn>=0.24.0",
17
+ "torch==2.2.2",
18
+ "transformers<5",
19
+ "numpy<2",
20
+ "requests>=2.31.0",
21
+ "sqlalchemy>=2.0.47",
22
+ "jupyter>=1.1.1",
23
+ "notebook>=7.5.5",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "pytest>=8.0.0",
29
+ "pytest-cov>=4.0.0",
30
+ "ruff>=0.4.0",
31
+ ]
32
+ training = [
33
+ "trl>=0.14.0,<0.15.0",
34
+ "accelerate>=0.34.0",
35
+ "matplotlib>=3.7.0",
36
+ ]
37
+
38
+ [project.scripts]
39
+ # Server entry point — enables: uv run server
40
+ server = "sql_env.server.app:main"
41
+
42
+ [tool.setuptools]
43
+ include-package-data = true
44
+ packages = [
45
+ "sql_env",
46
+ "sql_env.server",
47
+ "sql_env.data",
48
+ "sql_env.data.databases",
49
+ ]
50
+ package-dir = { "sql_env" = ".", "sql_env.server" = "server", "sql_env.data" = "data", "sql_env.data.databases" = "data/databases" }
51
+
52
+ [tool.ruff]
53
+ line-length = 88
54
+ exclude = ["scripts/"]
55
+
56
+ [tool.ruff.lint]
57
+ select = ["E", "F", "W"]
58
+
59
+ [tool.ruff.lint.per-file-ignores]
60
+ # SQL schema strings and LLM prompts are intentionally long
61
+ "server/sql_environment.py" = ["E501"]
62
+
63
+ [tool.pytest.ini_options]
64
+ testpaths = ["tests"]
65
+ pythonpath = ["."]
66
+ addopts = "--import-mode=importlib"
67
+ markers = [
68
+ "slow: integration or long-running tests",
69
+ ]
scripts/curate_questions.py ADDED
@@ -0,0 +1,921 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Curate multi-database Spider questions for SQLEnv."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import io
7
+ import json
8
+ import logging
9
+ import re
10
+ import sqlite3
11
+ import time
12
+ import zipfile
13
+ from collections.abc import Iterable
14
+ from pathlib import Path
15
+ from typing import Any, Callable
16
+ from urllib.parse import quote
17
+
18
+ import requests
19
+
20
+
21
+ SPIDER_SQLITE_URLS = (
22
+ "https://raw.githubusercontent.com/taoyds/spider/master/database/{db_id}/{db_id}.sqlite",
23
+ "https://github.com/taoyds/spider/raw/master/database/{db_id}/{db_id}.sqlite",
24
+ )
25
+ SPIDER_DATASET_FILE_ID = "1403EGqzIDoHMdQF4c9Bkyl7dZLZ5Wt6J"
26
+ SPIDER_DATASET_DOWNLOAD_URL = "https://drive.usercontent.google.com/download"
27
+
28
+ SQLITE_MAGIC_HEADER = b"SQLite format 3\x00"
29
+ DB_ID_PATTERN = re.compile(r"^[A-Za-z0-9_]+$")
30
+ TABLE_TOKEN_PATTERN = re.compile(
31
+ r"\b(?:FROM|JOIN)\s+([`\"\[]?[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?[`\"\]]?)",
32
+ flags=re.IGNORECASE,
33
+ )
34
+ CTE_ALIAS_PATTERN = re.compile(
35
+ r"(?:\bWITH\b|,)\s*([A-Za-z_][A-Za-z0-9_]*)\s+AS\s*\(",
36
+ flags=re.IGNORECASE,
37
+ )
38
+
39
+ TRAIN_SPLIT = "train"
40
+ EVAL_SPLIT = "eval"
41
+ VALID_SPLITS = {TRAIN_SPLIT, EVAL_SPLIT}
42
+ VALID_ANSWER_TYPES = {"integer", "float", "string", "list", "table"}
43
+ VALID_DIFFICULTIES = {"easy", "medium", "hard"}
44
+ REQUIRED_FIELDS = (
45
+ "question_id",
46
+ "question_text",
47
+ "database_name",
48
+ "gold_sql",
49
+ "gold_answer",
50
+ "answer_type",
51
+ "difficulty",
52
+ "tables_involved",
53
+ "split",
54
+ )
55
+
56
+ LOGGER = logging.getLogger(__name__)
57
+ _SPIDER_ARCHIVE_BYTES: bytes | None = None
58
+
59
+
60
+ def _normalize_table_name(raw_table: str) -> str:
61
+ """Normalize a table token extracted from SQL text."""
62
+ token = raw_table.strip().strip('`"[]')
63
+ if "." in token:
64
+ token = token.split(".", maxsplit=1)[1]
65
+ return token
66
+
67
+
68
+ def _validate_db_id(db_id: str) -> None:
69
+ """Validate that ``db_id`` is safe for filesystem usage."""
70
+ if not DB_ID_PATTERN.fullmatch(db_id):
71
+ raise ValueError(f"Invalid db_id '{db_id}'. Expected [A-Za-z0-9_]+")
72
+
73
+
74
+ def _is_valid_sqlite_file(path: Path) -> bool:
75
+ """Return True when the file looks like a SQLite database."""
76
+ if not path.exists() or path.stat().st_size < len(SQLITE_MAGIC_HEADER):
77
+ return False
78
+ with path.open("rb") as handle:
79
+ return handle.read(len(SQLITE_MAGIC_HEADER)) == SQLITE_MAGIC_HEADER
80
+
81
+
82
+ def _download_sqlite_file(db_id: str, destination: Path) -> None:
83
+ """Download one Spider SQLite file into destination.
84
+
85
+ Args:
86
+ db_id: Spider database identifier.
87
+ destination: Path to write ``{db_id}.sqlite``.
88
+
89
+ Raises:
90
+ FileNotFoundError: If all sources fail for this ``db_id``.
91
+ """
92
+ _validate_db_id(db_id)
93
+ destination.parent.mkdir(parents=True, exist_ok=True)
94
+
95
+ last_error: str | None = None
96
+ for url_template in SPIDER_SQLITE_URLS:
97
+ url = url_template.format(db_id=db_id)
98
+ for attempt in range(2):
99
+ try:
100
+ response = requests.get(url, timeout=30)
101
+ response.raise_for_status()
102
+ tmp_path = destination.with_suffix(".sqlite.tmp")
103
+ tmp_path.write_bytes(response.content)
104
+ if not _is_valid_sqlite_file(tmp_path):
105
+ tmp_path.unlink(missing_ok=True)
106
+ raise FileNotFoundError(
107
+ f"Downloaded payload for '{db_id}' was not a valid SQLite file"
108
+ )
109
+ tmp_path.replace(destination)
110
+ return
111
+ except (requests.RequestException, OSError, FileNotFoundError) as exc:
112
+ last_error = str(exc)
113
+ if attempt == 0:
114
+ time.sleep(5)
115
+
116
+ try:
117
+ archive_bytes = _download_spider_archive()
118
+ _extract_sqlite_from_archive(
119
+ archive_bytes=archive_bytes,
120
+ db_id=db_id,
121
+ destination=destination,
122
+ )
123
+ return
124
+ except (
125
+ requests.RequestException,
126
+ OSError,
127
+ FileNotFoundError,
128
+ zipfile.BadZipFile,
129
+ ) as exc:
130
+ last_error = str(exc)
131
+
132
+ raise FileNotFoundError(
133
+ f"Unable to download Spider SQLite for '{db_id}'. Last error: {last_error}"
134
+ )
135
+
136
+
137
+ def _download_spider_archive() -> bytes:
138
+ """Download and cache official Spider dataset archive bytes."""
139
+ global _SPIDER_ARCHIVE_BYTES
140
+ if _SPIDER_ARCHIVE_BYTES is not None:
141
+ return _SPIDER_ARCHIVE_BYTES
142
+
143
+ last_error: str | None = None
144
+ for attempt in range(2):
145
+ try:
146
+ session = requests.Session()
147
+ warning_page = session.get(
148
+ f"https://drive.google.com/uc?export=download&id={SPIDER_DATASET_FILE_ID}",
149
+ timeout=60,
150
+ )
151
+ warning_page.raise_for_status()
152
+
153
+ payload = warning_page.content
154
+ content_type = warning_page.headers.get("content-type", "")
155
+ if "text/html" in content_type.lower():
156
+ page_text = warning_page.text
157
+ params: dict[str, str] = {
158
+ "id": SPIDER_DATASET_FILE_ID,
159
+ "export": "download",
160
+ }
161
+ for field in ("confirm", "uuid"):
162
+ match = re.search(
163
+ rf'name="{field}" value="([^"]+)"',
164
+ page_text,
165
+ )
166
+ if match:
167
+ params[field] = match.group(1)
168
+
169
+ download_response = session.get(
170
+ SPIDER_DATASET_DOWNLOAD_URL,
171
+ params=params,
172
+ timeout=240,
173
+ )
174
+ download_response.raise_for_status()
175
+ payload = download_response.content
176
+
177
+ if not payload.startswith(b"PK"):
178
+ raise FileNotFoundError(
179
+ "Spider dataset download did not return a zip file"
180
+ )
181
+
182
+ _SPIDER_ARCHIVE_BYTES = payload
183
+ return _SPIDER_ARCHIVE_BYTES
184
+ except (requests.RequestException, FileNotFoundError) as exc:
185
+ last_error = str(exc)
186
+ if attempt == 0:
187
+ time.sleep(5)
188
+
189
+ raise FileNotFoundError(
190
+ f"Unable to download Spider dataset zip. Last error: {last_error}"
191
+ )
192
+
193
+
194
+ def _extract_sqlite_from_archive(
195
+ archive_bytes: bytes, db_id: str, destination: Path
196
+ ) -> None:
197
+ """Extract one SQLite file from the Spider zip archive."""
198
+ candidate_members = (
199
+ f"spider_data/database/{db_id}/{db_id}.sqlite",
200
+ f"spider/database/{db_id}/{db_id}.sqlite",
201
+ f"spider-master/database/{db_id}/{db_id}.sqlite",
202
+ )
203
+
204
+ payload: bytes | None = None
205
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
206
+ for member_name in candidate_members:
207
+ try:
208
+ payload = archive.read(member_name)
209
+ break
210
+ except KeyError:
211
+ continue
212
+
213
+ if payload is None:
214
+ raise FileNotFoundError(f"Database '{db_id}' not found in Spider archive")
215
+
216
+ tmp_path = destination.with_suffix(".sqlite.tmp")
217
+ tmp_path.write_bytes(payload)
218
+ if not _is_valid_sqlite_file(tmp_path):
219
+ tmp_path.unlink(missing_ok=True)
220
+ raise FileNotFoundError(
221
+ f"Archive payload for '{db_id}' was not a valid SQLite file"
222
+ )
223
+ tmp_path.replace(destination)
224
+
225
+
226
+ def download_spider_databases(db_ids: list[str], output_dir: Path) -> dict[str, Path]:
227
+ """Download Spider SQLite database files for selected ``db_ids``.
228
+
229
+ Existing files are reused and not downloaded again.
230
+
231
+ Args:
232
+ db_ids: Spider database IDs.
233
+ output_dir: Base output directory (e.g. ``data/databases``).
234
+
235
+ Returns:
236
+ Mapping of ``db_id`` to local SQLite path.
237
+
238
+ Raises:
239
+ FileNotFoundError: If no requested database can be prepared.
240
+ """
241
+ db_paths: dict[str, Path] = {}
242
+ output_root = output_dir.resolve()
243
+
244
+ for db_id in db_ids:
245
+ _validate_db_id(db_id)
246
+ sqlite_path = output_dir / db_id / f"{db_id}.sqlite"
247
+ resolved_path = sqlite_path.resolve()
248
+ if output_root not in resolved_path.parents:
249
+ raise ValueError(
250
+ "Resolved path "
251
+ f"'{resolved_path}' escapes output directory '{output_root}'"
252
+ )
253
+
254
+ if _is_valid_sqlite_file(sqlite_path):
255
+ db_paths[db_id] = sqlite_path
256
+ continue
257
+
258
+ try:
259
+ _download_sqlite_file(db_id=db_id, destination=sqlite_path)
260
+ except FileNotFoundError as exc:
261
+ LOGGER.warning("Skipping database '%s': %s", db_id, exc)
262
+ continue
263
+ db_paths[db_id] = sqlite_path
264
+
265
+ if not db_paths:
266
+ raise FileNotFoundError("No Spider SQLite databases could be prepared")
267
+
268
+ return db_paths
269
+
270
+
271
+ def _load_questions_from_hf_datasets(db_ids: set[str]) -> list[dict[str, Any]]:
272
+ """Load questions through the `datasets` package when available."""
273
+ try:
274
+ from datasets import load_dataset
275
+ except ImportError as exc:
276
+ raise ConnectionError("`datasets` package is not installed") from exc
277
+
278
+ records: list[dict[str, Any]] = []
279
+ for spider_split in ("train", "validation"):
280
+ for row in load_dataset("xlangai/spider", split=spider_split):
281
+ db_id = row.get("db_id")
282
+ if db_id not in db_ids:
283
+ continue
284
+ records.append(
285
+ {
286
+ "db_id": db_id,
287
+ "query": row.get("query", ""),
288
+ "question": row.get("question", ""),
289
+ "spider_split": spider_split,
290
+ }
291
+ )
292
+ return records
293
+
294
+
295
+ def _load_questions_from_spider_archive(db_ids: set[str]) -> list[dict[str, Any]]:
296
+ """Load Spider questions from the official dataset zip archive."""
297
+ archive_bytes = _download_spider_archive()
298
+ records: list[dict[str, Any]] = []
299
+
300
+ split_files = (
301
+ ("spider_data/train_spider.json", "train"),
302
+ ("spider_data/dev.json", "validation"),
303
+ )
304
+
305
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
306
+ for member_name, spider_split in split_files:
307
+ try:
308
+ payload = archive.read(member_name)
309
+ except KeyError:
310
+ continue
311
+
312
+ rows = json.loads(payload.decode("utf-8"))
313
+ if not isinstance(rows, list):
314
+ continue
315
+
316
+ for row in rows:
317
+ if not isinstance(row, dict):
318
+ continue
319
+ db_id = row.get("db_id")
320
+ if db_id not in db_ids:
321
+ continue
322
+ records.append(
323
+ {
324
+ "db_id": db_id,
325
+ "query": row.get("query", ""),
326
+ "question": row.get("question", ""),
327
+ "spider_split": spider_split,
328
+ }
329
+ )
330
+
331
+ if not records:
332
+ raise ConnectionError(
333
+ "No Spider questions found in archive for selected db_ids"
334
+ )
335
+
336
+ return records
337
+
338
+
339
+ def _load_questions_from_hf_rows_api(db_ids: set[str]) -> list[dict[str, Any]]:
340
+ """Load Spider questions from the HuggingFace datasets rows API."""
341
+ endpoint = "https://datasets-server.huggingface.co/rows"
342
+ records: list[dict[str, Any]] = []
343
+
344
+ for spider_split in ("train", "validation"):
345
+ offset = 0
346
+ length = 100
347
+ while True:
348
+ params = {
349
+ "dataset": "xlangai/spider",
350
+ "config": "spider",
351
+ "split": spider_split,
352
+ "offset": offset,
353
+ "length": length,
354
+ }
355
+ response = requests.get(endpoint, params=params, timeout=30)
356
+ response.raise_for_status()
357
+ payload = response.json()
358
+ rows = payload.get("rows", [])
359
+ if not rows:
360
+ break
361
+
362
+ for row_payload in rows:
363
+ row = row_payload.get("row", {})
364
+ db_id = row.get("db_id")
365
+ if db_id not in db_ids:
366
+ continue
367
+ records.append(
368
+ {
369
+ "db_id": db_id,
370
+ "query": row.get("query", ""),
371
+ "question": row.get("question", ""),
372
+ "spider_split": spider_split,
373
+ }
374
+ )
375
+ offset += len(rows)
376
+
377
+ return records
378
+
379
+
380
+ def load_spider_questions(db_ids: list[str]) -> list[dict[str, Any]]:
381
+ """Load raw Spider questions for selected databases.
382
+
383
+ Args:
384
+ db_ids: Spider database IDs.
385
+
386
+ Returns:
387
+ Filtered list of question records including ``spider_split`` metadata.
388
+
389
+ Raises:
390
+ ConnectionError: If all loading strategies fail.
391
+ """
392
+ if not db_ids:
393
+ return []
394
+
395
+ db_set = set(db_ids)
396
+ for db_id in db_set:
397
+ _validate_db_id(db_id)
398
+
399
+ loaders: tuple[Callable[[set[str]], list[dict[str, Any]]], ...] = (
400
+ _load_questions_from_spider_archive,
401
+ _load_questions_from_hf_datasets,
402
+ _load_questions_from_hf_rows_api,
403
+ )
404
+
405
+ last_error: str | None = None
406
+ for loader in loaders:
407
+ for attempt in range(2):
408
+ try:
409
+ return loader(db_set)
410
+ except (ConnectionError, OSError, requests.RequestException) as exc:
411
+ last_error = f"{loader.__name__}: {exc}"
412
+ if attempt == 0:
413
+ time.sleep(5)
414
+
415
+ raise ConnectionError(
416
+ f"Unable to load Spider questions from HuggingFace. Last error: {last_error}"
417
+ )
418
+
419
+
420
+ def _shape_rows(rows: list[tuple[Any, ...]]) -> Any:
421
+ """Shape SQL rows into scalar/list/table forms used by the dataset."""
422
+ if not rows:
423
+ return []
424
+
425
+ column_count = len(rows[0])
426
+ if column_count == 1:
427
+ values = [row[0] for row in rows]
428
+ if len(values) == 1:
429
+ return values[0]
430
+ return values
431
+
432
+ return [list(row) for row in rows]
433
+
434
+
435
+ def compute_gold_answer(gold_sql: str, db_path: Path) -> Any:
436
+ """Execute gold SQL against SQLite and return a normalized result."""
437
+ if not db_path.exists():
438
+ raise FileNotFoundError(f"Database not found: {db_path}")
439
+ if not _is_valid_sqlite_file(db_path):
440
+ raise sqlite3.Error(f"Invalid SQLite database file: {db_path}")
441
+
442
+ db_uri = f"file:{quote(str(db_path.resolve()))}?mode=ro"
443
+ with sqlite3.connect(db_uri, uri=True) as conn:
444
+ cursor = conn.execute(gold_sql)
445
+ rows = cursor.fetchall()
446
+ return _shape_rows(rows)
447
+
448
+
449
+ def classify_answer_type(gold_answer: Any) -> str:
450
+ """Classify the answer type for a computed gold answer."""
451
+ if isinstance(gold_answer, bool):
452
+ return "integer"
453
+ if isinstance(gold_answer, int):
454
+ return "integer"
455
+ if isinstance(gold_answer, float):
456
+ return "float"
457
+ if isinstance(gold_answer, str):
458
+ return "string"
459
+
460
+ if isinstance(gold_answer, tuple):
461
+ if len(gold_answer) == 1:
462
+ return classify_answer_type(gold_answer[0])
463
+ return "table"
464
+
465
+ if isinstance(gold_answer, list):
466
+ if not gold_answer:
467
+ return "list"
468
+ first = gold_answer[0]
469
+ if isinstance(first, (list, tuple)):
470
+ return "table"
471
+ return "list"
472
+
473
+ if gold_answer is None:
474
+ return "list"
475
+
476
+ raise ValueError(f"Unsupported gold_answer type: {type(gold_answer).__name__}")
477
+
478
+
479
+ def extract_tables_involved(gold_sql: str) -> list[str]:
480
+ """Extract table names referenced after FROM/JOIN tokens."""
481
+ if not gold_sql.strip():
482
+ return []
483
+
484
+ cte_aliases = {
485
+ match.group(1).lower() for match in CTE_ALIAS_PATTERN.finditer(gold_sql)
486
+ }
487
+
488
+ tables: set[str] = set()
489
+ for match in TABLE_TOKEN_PATTERN.finditer(gold_sql):
490
+ normalized = _normalize_table_name(match.group(1))
491
+ if normalized and normalized.lower() not in cte_aliases:
492
+ tables.add(normalized)
493
+ return sorted(tables)
494
+
495
+
496
+ def classify_difficulty(tables_involved: Iterable[str]) -> str:
497
+ """Assign difficulty from the number of tables involved."""
498
+ table_count = len({name for name in tables_involved if name})
499
+ if table_count <= 2:
500
+ return "easy"
501
+ if table_count == 3:
502
+ return "medium"
503
+ return "hard"
504
+
505
+
506
+ def _load_db_list(db_list_path: Path) -> list[str]:
507
+ """Load database IDs from a JSON array file."""
508
+ payload = json.loads(db_list_path.read_text(encoding="utf-8"))
509
+ if not isinstance(payload, list) or not all(
510
+ isinstance(item, str) for item in payload
511
+ ):
512
+ raise ValueError(f"Expected JSON list[str] in {db_list_path}")
513
+ return payload
514
+
515
+
516
+ def assign_splits(questions: list[dict[str, Any]]) -> list[dict[str, Any]]:
517
+ """Assign SQLEnv train/eval splits from Spider split metadata."""
518
+ split_questions: list[dict[str, Any]] = []
519
+ for question in questions:
520
+ spider_split = str(question.get("spider_split", "")).lower()
521
+ if spider_split in {"validation", EVAL_SPLIT}:
522
+ split = EVAL_SPLIT
523
+ elif spider_split in {"train", TRAIN_SPLIT}:
524
+ split = TRAIN_SPLIT
525
+ else:
526
+ LOGGER.warning(
527
+ "Unknown spider_split '%s' for database '%s'; defaulting to train",
528
+ spider_split,
529
+ question.get("database_name", "unknown"),
530
+ )
531
+ split = TRAIN_SPLIT
532
+ updated = dict(question)
533
+ updated["split"] = split
534
+ split_questions.append(updated)
535
+
536
+ total = len(split_questions)
537
+ if total <= 1:
538
+ return split_questions
539
+
540
+ train_records = [q for q in split_questions if q["split"] == TRAIN_SPLIT]
541
+ eval_records = [q for q in split_questions if q["split"] == EVAL_SPLIT]
542
+ if not train_records or not eval_records:
543
+ return split_questions
544
+
545
+ target_eval_count = max(1, round(total * 0.3))
546
+ current_eval_count = len(eval_records)
547
+
548
+ if current_eval_count >= target_eval_count:
549
+ if current_eval_count == target_eval_count:
550
+ return split_questions
551
+
552
+ excess = min(current_eval_count - target_eval_count, len(eval_records))
553
+ for index in range(excess):
554
+ eval_records[index]["split"] = TRAIN_SPLIT
555
+ return split_questions
556
+
557
+ needed = min(target_eval_count - current_eval_count, len(train_records))
558
+ for index in range(needed):
559
+ train_records[index]["split"] = EVAL_SPLIT
560
+
561
+ return split_questions
562
+
563
+
564
+ def _sort_enriched_questions(
565
+ questions: list[dict[str, Any]],
566
+ ) -> list[dict[str, Any]]:
567
+ """Return deterministically ordered records for stable output files."""
568
+ return sorted(
569
+ questions,
570
+ key=lambda item: (
571
+ str(item.get("database_name", "")),
572
+ str(item.get("spider_split", "")),
573
+ str(item.get("gold_sql", "")),
574
+ str(item.get("question_text", "")),
575
+ ),
576
+ )
577
+
578
+
579
+ def _assign_question_ids(questions: list[dict[str, Any]]) -> list[dict[str, Any]]:
580
+ """Assign IDs with format ``{db_id}_{split}_{index:03d}`` per db/split."""
581
+ counters: dict[tuple[str, str], int] = {}
582
+ with_ids: list[dict[str, Any]] = []
583
+
584
+ for question in questions:
585
+ db_id = str(question["database_name"])
586
+ split = str(question["split"])
587
+ key = (db_id, split)
588
+ index = counters.get(key, 0)
589
+ counters[key] = index + 1
590
+
591
+ updated = dict(question)
592
+ updated["question_id"] = f"{db_id}_{split}_{index:03d}"
593
+ with_ids.append(updated)
594
+
595
+ return with_ids
596
+
597
+
598
+ def _write_output(path: Path, records: list[dict[str, Any]]) -> None:
599
+ """Write JSON records to disk."""
600
+ path.parent.mkdir(parents=True, exist_ok=True)
601
+ path.write_text(json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8")
602
+
603
+
604
+ def _load_output_questions(path: Path) -> list[dict[str, Any]]:
605
+ """Load curated output records from a JSON file."""
606
+ try:
607
+ payload = json.loads(path.read_text(encoding="utf-8"))
608
+ except FileNotFoundError as exc:
609
+ raise ValueError(f"Output dataset file not found: {path}") from exc
610
+ except json.JSONDecodeError as exc:
611
+ raise ValueError(f"Output dataset file is invalid JSON: {path}") from exc
612
+
613
+ if not isinstance(payload, list):
614
+ raise ValueError(f"Expected JSON list in {path}")
615
+ records: list[dict[str, Any]] = []
616
+ for index, item in enumerate(payload):
617
+ if not isinstance(item, dict):
618
+ raise ValueError(f"Expected record object at index {index} in {path}")
619
+ records.append(item)
620
+ return records
621
+
622
+
623
+ def _question_fingerprint(record: dict[str, Any]) -> tuple[str, str, str]:
624
+ """Build a stable identity tuple for split leakage checks."""
625
+ return (
626
+ str(record.get("database_name", "")),
627
+ str(record.get("question_text", "")),
628
+ str(record.get("gold_sql", "")),
629
+ )
630
+
631
+
632
+ def validate_dataset(
633
+ questions: list[dict[str, Any]],
634
+ db_paths: dict[str, Path],
635
+ ) -> list[str]:
636
+ """Validate curated records and return all detected issues."""
637
+ errors: list[str] = []
638
+ question_ids: set[str] = set()
639
+ train_fingerprints: set[tuple[str, str, str]] = set()
640
+ eval_fingerprints: set[tuple[str, str, str]] = set()
641
+ difficulty_counts: dict[str, int] = {key: 0 for key in VALID_DIFFICULTIES}
642
+
643
+ for index, question in enumerate(questions):
644
+ context = f"record[{index}]"
645
+ missing = [field for field in REQUIRED_FIELDS if field not in question]
646
+ if missing:
647
+ errors.append(f"{context}: missing required fields: {', '.join(missing)}")
648
+ continue
649
+
650
+ question_id = str(question["question_id"]).strip()
651
+ if not question_id:
652
+ errors.append(f"{context}: question_id must be non-empty")
653
+ elif question_id in question_ids:
654
+ errors.append(f"{context}: duplicate question_id '{question_id}'")
655
+ else:
656
+ question_ids.add(question_id)
657
+
658
+ question_text = str(question["question_text"]).strip()
659
+ if not question_text:
660
+ errors.append(f"{context}: question_text must be non-empty")
661
+
662
+ db_id = str(question["database_name"]).strip()
663
+ if not db_id:
664
+ errors.append(f"{context}: database_name must be non-empty")
665
+ continue
666
+
667
+ gold_sql = str(question["gold_sql"]).strip()
668
+ if not gold_sql:
669
+ errors.append(f"{context}: gold_sql must be non-empty")
670
+
671
+ answer_type = str(question["answer_type"]).strip()
672
+ if answer_type not in VALID_ANSWER_TYPES:
673
+ errors.append(
674
+ f"{context}: answer_type '{answer_type}' is invalid "
675
+ f"(expected one of {sorted(VALID_ANSWER_TYPES)})"
676
+ )
677
+
678
+ difficulty = str(question["difficulty"]).strip()
679
+ if difficulty not in VALID_DIFFICULTIES:
680
+ errors.append(
681
+ f"{context}: difficulty '{difficulty}' is invalid "
682
+ f"(expected one of {sorted(VALID_DIFFICULTIES)})"
683
+ )
684
+ else:
685
+ difficulty_counts[difficulty] += 1
686
+
687
+ tables = question["tables_involved"]
688
+ if not isinstance(tables, list) or not tables:
689
+ errors.append(f"{context}: tables_involved must be a non-empty list")
690
+ elif not all(
691
+ isinstance(table_name, str) and table_name.strip() for table_name in tables
692
+ ):
693
+ errors.append(
694
+ f"{context}: tables_involved must contain non-empty table name strings"
695
+ )
696
+
697
+ split = str(question["split"]).strip()
698
+ if split not in VALID_SPLITS:
699
+ errors.append(
700
+ f"{context}: split '{split}' is invalid "
701
+ f"(expected one of {sorted(VALID_SPLITS)})"
702
+ )
703
+ else:
704
+ fingerprint = _question_fingerprint(question)
705
+ if split == TRAIN_SPLIT:
706
+ train_fingerprints.add(fingerprint)
707
+ else:
708
+ eval_fingerprints.add(fingerprint)
709
+
710
+ if gold_sql and db_id in db_paths:
711
+ try:
712
+ recomputed = compute_gold_answer(
713
+ gold_sql=gold_sql, db_path=db_paths[db_id]
714
+ )
715
+ if recomputed != question["gold_answer"]:
716
+ errors.append(
717
+ f"{context}: gold_answer mismatch"
718
+ f" for question_id '{question_id}'"
719
+ )
720
+ except (sqlite3.Error, FileNotFoundError) as exc:
721
+ errors.append(
722
+ f"{context}: gold_sql execution failed"
723
+ f" for database '{db_id}': {exc}"
724
+ )
725
+ elif db_id not in db_paths:
726
+ errors.append(
727
+ f"{context}: missing database path"
728
+ f" for '{db_id}' (expected in data/databases)"
729
+ )
730
+
731
+ leaked = sorted(train_fingerprints.intersection(eval_fingerprints))
732
+ if leaked:
733
+ errors.append(
734
+ f"train/eval split leak detected:"
735
+ f" {len(leaked)} question(s) appear in both splits"
736
+ )
737
+
738
+ total = len(questions)
739
+ if total > 0:
740
+ easy_ratio = difficulty_counts["easy"] / total
741
+ medium_ratio = difficulty_counts["medium"] / total
742
+ hard_ratio = difficulty_counts["hard"] / total
743
+ if abs(easy_ratio - 0.40) > 0.20:
744
+ LOGGER.warning(
745
+ "Difficulty distribution off target: easy=%s (target 40%%)",
746
+ f"{easy_ratio:.2%}",
747
+ )
748
+ if abs(medium_ratio - 0.40) > 0.20:
749
+ LOGGER.warning(
750
+ "Difficulty distribution off target: medium=%s (target 40%%)",
751
+ f"{medium_ratio:.2%}",
752
+ )
753
+ if abs(hard_ratio - 0.20) > 0.15:
754
+ LOGGER.warning(
755
+ "Difficulty distribution off target: hard=%s (target 20%%)",
756
+ f"{hard_ratio:.2%}",
757
+ )
758
+
759
+ return errors
760
+
761
+
762
+ def main() -> None:
763
+ """CLI entry point for the dataset curation pipeline."""
764
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
765
+
766
+ parser = argparse.ArgumentParser(
767
+ description="Curate Spider questions into enriched train/eval JSON files."
768
+ )
769
+ parser.add_argument(
770
+ "--db-list",
771
+ type=Path,
772
+ default=Path("data/questions/db_list.json"),
773
+ help="Path to JSON list of Spider database IDs.",
774
+ )
775
+ parser.add_argument(
776
+ "--output-dir",
777
+ type=Path,
778
+ default=Path("data/databases"),
779
+ help="Directory where SQLite files will be stored.",
780
+ )
781
+ parser.add_argument(
782
+ "--validate",
783
+ action="store_true",
784
+ help="Validate existing output files instead of running full curation.",
785
+ )
786
+ parser.add_argument(
787
+ "--train-output",
788
+ type=Path,
789
+ default=Path("data/questions/questions_train.json"),
790
+ help="Output path for curated train questions.",
791
+ )
792
+ parser.add_argument(
793
+ "--eval-output",
794
+ type=Path,
795
+ default=Path("data/questions/questions_eval.json"),
796
+ help="Output path for curated eval questions.",
797
+ )
798
+
799
+ args = parser.parse_args()
800
+
801
+ if args.validate:
802
+ try:
803
+ train_questions = _load_output_questions(args.train_output)
804
+ eval_questions = _load_output_questions(args.eval_output)
805
+ except ValueError as exc:
806
+ print(f"ERROR: {exc}")
807
+ raise SystemExit(1) from exc
808
+
809
+ questions = train_questions + eval_questions
810
+
811
+ db_ids = sorted(
812
+ {str(record.get("database_name", "")).strip() for record in questions}
813
+ )
814
+ try:
815
+ for db_id in db_ids:
816
+ _validate_db_id(db_id)
817
+ except ValueError as exc:
818
+ print(f"ERROR: {exc}")
819
+ raise SystemExit(1) from exc
820
+
821
+ db_paths = {
822
+ db_id: args.output_dir / db_id / f"{db_id}.sqlite"
823
+ for db_id in db_ids
824
+ if db_id
825
+ }
826
+ errors = validate_dataset(questions=questions, db_paths=db_paths)
827
+ if errors:
828
+ for error in errors:
829
+ print(f"ERROR: {error}")
830
+ raise SystemExit(1)
831
+
832
+ print(f"Validation passed for {len(questions)} curated records")
833
+ raise SystemExit(0)
834
+
835
+ db_ids = _load_db_list(args.db_list)
836
+ db_paths = download_spider_databases(db_ids=db_ids, output_dir=args.output_dir)
837
+ raw_questions = load_spider_questions(db_ids)
838
+
839
+ enriched_questions: list[dict[str, Any]] = []
840
+ skipped_count = 0
841
+ for raw_question in raw_questions:
842
+ db_id = str(raw_question.get("db_id", "")).strip()
843
+ if db_id not in db_paths:
844
+ skipped_count += 1
845
+ continue
846
+
847
+ gold_sql = str(raw_question.get("query", "")).strip()
848
+ question_text = str(raw_question.get("question", "")).strip()
849
+ if not gold_sql or not question_text:
850
+ skipped_count += 1
851
+ continue
852
+
853
+ try:
854
+ gold_answer = compute_gold_answer(
855
+ gold_sql=gold_sql,
856
+ db_path=db_paths[db_id],
857
+ )
858
+ except sqlite3.Error as exc:
859
+ LOGGER.warning(
860
+ "Skipping question for database '%s' due to SQL execution failure: %s",
861
+ db_id,
862
+ exc,
863
+ )
864
+ skipped_count += 1
865
+ continue
866
+
867
+ tables_involved = extract_tables_involved(gold_sql)
868
+ if not tables_involved:
869
+ LOGGER.warning(
870
+ "Skipping question for database '%s' because no tables were extracted",
871
+ db_id,
872
+ )
873
+ skipped_count += 1
874
+ continue
875
+
876
+ enriched_questions.append(
877
+ {
878
+ "question_text": question_text,
879
+ "database_name": db_id,
880
+ "gold_sql": gold_sql,
881
+ "gold_answer": gold_answer,
882
+ "answer_type": classify_answer_type(gold_answer),
883
+ "difficulty": classify_difficulty(tables_involved),
884
+ "tables_involved": tables_involved,
885
+ "spider_split": raw_question.get("spider_split", "train"),
886
+ }
887
+ )
888
+
889
+ split_questions = assign_splits(_sort_enriched_questions(enriched_questions))
890
+ final_questions = _assign_question_ids(split_questions)
891
+
892
+ validation_errors = validate_dataset(questions=final_questions, db_paths=db_paths)
893
+ if validation_errors:
894
+ for error in validation_errors:
895
+ print(f"ERROR: {error}")
896
+ raise SystemExit(1)
897
+
898
+ train_questions: list[dict[str, Any]] = []
899
+ eval_questions: list[dict[str, Any]] = []
900
+ for record in final_questions:
901
+ output_record = {
902
+ key: value for key, value in record.items() if key != "spider_split"
903
+ }
904
+ if output_record["split"] == TRAIN_SPLIT:
905
+ train_questions.append(output_record)
906
+ else:
907
+ eval_questions.append(output_record)
908
+
909
+ _write_output(args.train_output, train_questions)
910
+ _write_output(args.eval_output, eval_questions)
911
+
912
+ print(f"Prepared {len(db_paths)} databases in {args.output_dir}")
913
+ print(f"Loaded {len(raw_questions)} Spider questions")
914
+ print(f"Curated {len(final_questions)} questions (skipped {skipped_count})")
915
+ print("Validation passed")
916
+ print(f"Wrote {len(train_questions)} train records to {args.train_output}")
917
+ print(f"Wrote {len(eval_questions)} eval records to {args.eval_output}")
918
+
919
+
920
+ if __name__ == "__main__":
921
+ main()
scripts/download_spider_data.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Script to download Spider dataset questions for specific databases.
3
+
4
+ Usage:
5
+ python download_spider_data.py --db-id student_assessment
6
+ python download_spider_data.py --db-id student_assessment --split validation
7
+ python download_spider_data.py --db-id all # downloads all db_ids
8
+ """
9
+
10
+ import json
11
+ import argparse
12
+ from pathlib import Path
13
+ from datasets import load_dataset
14
+
15
+
16
+ def download_spider_questions(
17
+ db_id: str = "student_assessment",
18
+ split: str = "train",
19
+ output_dir: str = "data/questions",
20
+ ) -> None:
21
+ """Download Spider dataset questions for specified database(s).
22
+
23
+ Args:
24
+ db_id: Database ID to filter by, or "all" to get all databases
25
+ split: Dataset split ("train" or "validation")
26
+ output_dir: Directory to save JSON files
27
+ """
28
+ output_path = Path(output_dir)
29
+ output_path.mkdir(parents=True, exist_ok=True)
30
+
31
+ print(f"Loading Spider dataset ({split} split)...")
32
+ dataset = load_dataset("xlangai/spider", split=split)
33
+
34
+ if db_id.lower() == "all":
35
+ # Group by db_id
36
+ grouped = {}
37
+ for item in dataset:
38
+ current_db_id = item.get("db_id")
39
+ if current_db_id not in grouped:
40
+ grouped[current_db_id] = []
41
+ grouped[current_db_id].append(item)
42
+
43
+ total_questions = 0
44
+ for current_db_id, questions in grouped.items():
45
+ filepath = output_path / f"{current_db_id}.json"
46
+ with open(filepath, "w") as f:
47
+ json.dump(questions, f, indent=2)
48
+ print(f" {current_db_id}: {len(questions)} questions → {filepath}")
49
+ total_questions += len(questions)
50
+
51
+ print(f"\nTotal: {total_questions} questions across {len(grouped)} databases")
52
+ else:
53
+ # Filter for specific db_id
54
+ filtered_data = [item for item in dataset if item.get("db_id") == db_id]
55
+
56
+ if not filtered_data:
57
+ print(f"No questions found for db_id='{db_id}'")
58
+ return
59
+
60
+ filepath = output_path / f"{db_id}.json"
61
+ with open(filepath, "w") as f:
62
+ json.dump(filtered_data, f, indent=2)
63
+
64
+ print(f"Found {len(filtered_data)} questions for db_id='{db_id}'")
65
+ print(f"Saved to {filepath}")
66
+
67
+ # Print sample
68
+ if filtered_data:
69
+ sample = filtered_data[0]
70
+ print("\nFirst question sample:")
71
+ print(
72
+ json.dumps(
73
+ {k: v for k, v in sample.items() if k != "evidence"}, indent=2
74
+ )
75
+ )
76
+
77
+
78
+ if __name__ == "__main__":
79
+ parser = argparse.ArgumentParser(
80
+ description="Download Spider dataset questions for specific databases",
81
+ formatter_class=argparse.RawDescriptionHelpFormatter,
82
+ )
83
+ parser.add_argument(
84
+ "--db-id",
85
+ type=str,
86
+ default="student_assessment",
87
+ help="Database ID to filter by (or 'all' for all databases)",
88
+ )
89
+ parser.add_argument(
90
+ "--split",
91
+ type=str,
92
+ default="train",
93
+ choices=["train", "validation"],
94
+ help="Dataset split to download",
95
+ )
96
+ parser.add_argument(
97
+ "--output-dir",
98
+ type=str,
99
+ default="data/questions",
100
+ help="Directory to save JSON files",
101
+ )
102
+
103
+ args = parser.parse_args()
104
+ download_spider_questions(
105
+ db_id=args.db_id, split=args.split, output_dir=args.output_dir
106
+ )
scripts/download_spider_databases.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Download Spider SQLite databases used by SQLEnv.
2
+
3
+ Uses the same download logic as curate_questions.py: tries GitHub raw URLs
4
+ first, then falls back to the official Google Drive Spider archive.
5
+
6
+ Examples
7
+ --------
8
+ Download the default database (student_assessment):
9
+ uv run python scripts/download_spider_databases.py
10
+
11
+ Download a specific database:
12
+ uv run python scripts/download_spider_databases.py --db-id concert_singer
13
+
14
+ Download all databases referenced in db_list.json:
15
+ uv run python scripts/download_spider_databases.py --db-id all
16
+
17
+ Force re-download:
18
+ uv run python scripts/download_spider_databases.py --force
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import io
25
+ import json
26
+ import re
27
+ import time
28
+ import zipfile
29
+ from pathlib import Path
30
+ from urllib.error import HTTPError, URLError
31
+ from urllib.request import Request, urlopen
32
+
33
+ SPIDER_RAW_SQLITE_URLS = (
34
+ "https://raw.githubusercontent.com/taoyds/spider/master/database/{db_id}/{db_id}.sqlite",
35
+ "https://github.com/taoyds/spider/raw/master/database/{db_id}/{db_id}.sqlite",
36
+ )
37
+ SPIDER_ARCHIVE_DRIVE_ID = "1403EGqzIDoHMdQF4c9Bkyl7dZLZ5Wt6J"
38
+ SQLITE_MAGIC = b"SQLite format 3\x00"
39
+ DB_LIST_PATH = Path("data/questions/db_list.json")
40
+
41
+
42
+ def _validate_db_id(db_id: str) -> str:
43
+ normalized = db_id.strip()
44
+ if not normalized:
45
+ raise ValueError("db_id cannot be empty")
46
+ if not re.fullmatch(r"[A-Za-z0-9_]+", normalized):
47
+ raise ValueError(
48
+ "Invalid db_id — only letters, numbers, and underscores allowed."
49
+ )
50
+ return normalized
51
+
52
+
53
+ def _is_valid_sqlite(path: Path) -> bool:
54
+ if not path.exists() or path.stat().st_size < 16:
55
+ return False
56
+ with path.open("rb") as f:
57
+ return f.read(16) == SQLITE_MAGIC
58
+
59
+
60
+ def _safe_sqlite_path(output_dir: Path, db_id: str) -> Path:
61
+ sqlite_path = output_dir / db_id / f"{db_id}.sqlite"
62
+ output_root = output_dir.resolve()
63
+ resolved = sqlite_path.resolve()
64
+ if output_root not in resolved.parents:
65
+ raise ValueError(f"Resolved path escapes output directory: {resolved}")
66
+ return sqlite_path
67
+
68
+
69
+ def _try_raw_download(db_id: str, destination: Path) -> bool:
70
+ """Try downloading from GitHub raw URLs. Returns True on success."""
71
+ for url_template in SPIDER_RAW_SQLITE_URLS:
72
+ url = url_template.format(db_id=db_id)
73
+ try:
74
+ req = Request(url, headers={"User-Agent": "sqlenv/1.0"})
75
+ with urlopen(req, timeout=30) as resp:
76
+ data = resp.read()
77
+ if not data.startswith(SQLITE_MAGIC):
78
+ continue
79
+ tmp = destination.with_suffix(".tmp")
80
+ destination.parent.mkdir(parents=True, exist_ok=True)
81
+ tmp.write_bytes(data)
82
+ tmp.replace(destination)
83
+ return True
84
+ except (HTTPError, URLError, OSError):
85
+ continue
86
+ return False
87
+
88
+
89
+ def _download_drive_archive() -> bytes:
90
+ """Download official Spider archive from Google Drive."""
91
+ drive_url = (
92
+ f"https://drive.google.com/uc?export=download&id={SPIDER_ARCHIVE_DRIVE_ID}"
93
+ )
94
+ req = Request(drive_url, headers={"User-Agent": "sqlenv/1.0"})
95
+
96
+ for attempt in range(2):
97
+ try:
98
+ with urlopen(req, timeout=120) as resp:
99
+ payload = resp.read()
100
+
101
+ if payload.startswith(b"PK"):
102
+ return payload
103
+
104
+ # Google Drive virus-scan warning page — parse confirm token
105
+ text = payload.decode("utf-8", errors="replace")
106
+ confirm_match = re.search(r'name="confirm" value="([^"]+)"', text)
107
+ if confirm_match:
108
+ confirm_url = (
109
+ "https://drive.usercontent.google.com/download"
110
+ f"?id={SPIDER_ARCHIVE_DRIVE_ID}"
111
+ f"&export=download&confirm={confirm_match.group(1)}"
112
+ )
113
+ confirm_req = Request(
114
+ confirm_url,
115
+ headers={"User-Agent": "sqlenv/1.0"},
116
+ )
117
+ with urlopen(confirm_req, timeout=240) as resp2:
118
+ payload = resp2.read()
119
+ if payload.startswith(b"PK"):
120
+ return payload
121
+
122
+ raise RuntimeError("Drive response was not a zip file")
123
+ except (HTTPError, URLError, OSError, RuntimeError):
124
+ if attempt == 0:
125
+ time.sleep(3)
126
+
127
+ raise RuntimeError(
128
+ "Failed to download Spider archive from Google Drive after retries"
129
+ )
130
+
131
+
132
+ def _extract_from_archive(archive_bytes: bytes, db_id: str, destination: Path) -> None:
133
+ """Extract a single database from the Spider zip archive."""
134
+ candidates = [
135
+ f"spider_data/database/{db_id}/{db_id}.sqlite",
136
+ f"spider/database/{db_id}/{db_id}.sqlite",
137
+ f"spider-master/database/{db_id}/{db_id}.sqlite",
138
+ ]
139
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
140
+ for member in candidates:
141
+ try:
142
+ data = zf.read(member)
143
+ if data.startswith(SQLITE_MAGIC):
144
+ destination.parent.mkdir(parents=True, exist_ok=True)
145
+ tmp = destination.with_suffix(".tmp")
146
+ tmp.write_bytes(data)
147
+ tmp.replace(destination)
148
+ return
149
+ except KeyError:
150
+ continue
151
+ raise FileNotFoundError(f"Database '{db_id}' not found in Spider archive")
152
+
153
+
154
+ def _extract_all_from_archive(
155
+ archive_bytes: bytes, output_dir: Path, force: bool
156
+ ) -> int:
157
+ """Extract all databases from the Spider archive."""
158
+ count = 0
159
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
160
+ for member in zf.namelist():
161
+ if not member.endswith(".sqlite"):
162
+ continue
163
+ if "/database/" not in member:
164
+ continue
165
+ db_name = Path(member).stem
166
+ target = output_dir / db_name / f"{db_name}.sqlite"
167
+ if target.exists() and not force:
168
+ continue
169
+ data = zf.read(member)
170
+ if not data.startswith(SQLITE_MAGIC):
171
+ continue
172
+ target.parent.mkdir(parents=True, exist_ok=True)
173
+ tmp = target.with_suffix(".tmp")
174
+ tmp.write_bytes(data)
175
+ tmp.replace(target)
176
+ count += 1
177
+ return count
178
+
179
+
180
+ def download_database(db_id: str, output_dir: Path, force: bool = False) -> Path:
181
+ """Download one Spider database, with Google Drive fallback."""
182
+ normalized = _validate_db_id(db_id)
183
+ sqlite_path = _safe_sqlite_path(output_dir, normalized)
184
+
185
+ if _is_valid_sqlite(sqlite_path) and not force:
186
+ print(f"Already exists: {sqlite_path}")
187
+ return sqlite_path
188
+
189
+ print(f"Downloading {normalized}...")
190
+
191
+ if _try_raw_download(normalized, sqlite_path):
192
+ print(f" -> {sqlite_path} (from GitHub)")
193
+ return sqlite_path
194
+
195
+ print(" GitHub raw URLs failed, trying Google Drive archive...")
196
+ archive_bytes = _download_drive_archive()
197
+ _extract_from_archive(archive_bytes, normalized, sqlite_path)
198
+ print(f" -> {sqlite_path} (from Drive archive)")
199
+ return sqlite_path
200
+
201
+
202
+ def download_all(output_dir: Path, force: bool = False) -> int:
203
+ """Download all databases from Google Drive archive."""
204
+ output_dir.mkdir(parents=True, exist_ok=True)
205
+ print("Downloading Spider archive from Google Drive...")
206
+ archive_bytes = _download_drive_archive()
207
+ count = _extract_all_from_archive(archive_bytes, output_dir, force)
208
+ print(f"Extracted {count} database(s) to {output_dir}")
209
+ return count
210
+
211
+
212
+ def download_listed(output_dir: Path, force: bool = False) -> int:
213
+ """Download databases listed in db_list.json."""
214
+ if not DB_LIST_PATH.exists():
215
+ raise FileNotFoundError(
216
+ f"{DB_LIST_PATH} not found — run curate_questions.py first "
217
+ "or use --db-id <name> to download individual databases"
218
+ )
219
+ db_ids = json.loads(DB_LIST_PATH.read_text())
220
+ print(f"Downloading {len(db_ids)} databases from db_list.json...")
221
+
222
+ # Try GitHub raw first, batch fallback to archive for failures
223
+ remaining = []
224
+ for db_id in db_ids:
225
+ normalized = _validate_db_id(db_id)
226
+ sqlite_path = _safe_sqlite_path(output_dir, normalized)
227
+ if _is_valid_sqlite(sqlite_path) and not force:
228
+ print(f" Already exists: {normalized}")
229
+ continue
230
+ if _try_raw_download(normalized, sqlite_path):
231
+ print(f" Downloaded: {normalized} (GitHub)")
232
+ else:
233
+ remaining.append(normalized)
234
+
235
+ if remaining:
236
+ print(
237
+ f" {len(remaining)} failed from GitHub, falling back to Drive archive..."
238
+ )
239
+ archive_bytes = _download_drive_archive()
240
+ for db_id in remaining:
241
+ sqlite_path = _safe_sqlite_path(output_dir, db_id)
242
+ try:
243
+ _extract_from_archive(archive_bytes, db_id, sqlite_path)
244
+ print(f" Downloaded: {db_id} (Drive archive)")
245
+ except FileNotFoundError:
246
+ print(f" FAILED: {db_id} not found in archive")
247
+
248
+ downloaded = sum(
249
+ 1
250
+ for db_id in db_ids
251
+ if _is_valid_sqlite(output_dir / db_id / f"{db_id}.sqlite")
252
+ )
253
+ print(f"Ready: {downloaded}/{len(db_ids)} databases in {output_dir}")
254
+ return downloaded
255
+
256
+
257
+ def parse_args() -> argparse.Namespace:
258
+ parser = argparse.ArgumentParser(
259
+ description="Download Spider SQLite databases for SQLEnv",
260
+ )
261
+ parser.add_argument(
262
+ "--db-id",
263
+ type=str,
264
+ default=None,
265
+ help=(
266
+ "Spider database ID to download. "
267
+ "Use 'all' for every Spider DB, or omit to download "
268
+ "databases listed in data/questions/db_list.json"
269
+ ),
270
+ )
271
+ parser.add_argument(
272
+ "--output-dir",
273
+ type=Path,
274
+ default=Path("data/databases"),
275
+ help="Directory to store databases (default: data/databases)",
276
+ )
277
+ parser.add_argument(
278
+ "--force",
279
+ action="store_true",
280
+ help="Overwrite existing files",
281
+ )
282
+ return parser.parse_args()
283
+
284
+
285
+ def main() -> None:
286
+ args = parse_args()
287
+
288
+ if args.db_id is None:
289
+ download_listed(output_dir=args.output_dir, force=args.force)
290
+ elif args.db_id.lower() == "all":
291
+ download_all(output_dir=args.output_dir, force=args.force)
292
+ else:
293
+ download_database(
294
+ db_id=args.db_id,
295
+ output_dir=args.output_dir,
296
+ force=args.force,
297
+ )
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()
scripts/generate_models_from_schema.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Script to download Spider schema and auto-generate SQLAlchemy models.
3
+
4
+ The spider-schema dataset contains detailed database schemas including
5
+ table names, column names, types, and relationships. This script
6
+ downloads the schema and generates SQLAlchemy ORM models.
7
+
8
+ Usage:
9
+ # Generate models for student_assessment database
10
+ python generate_models_from_schema.py --db-id student_assessment
11
+
12
+ # Generate for multiple databases
13
+ python generate_models_from_schema.py --db-id all --output-dir models/
14
+
15
+ # Load from validation split
16
+ python generate_models_from_schema.py --db-id student_assessment --split validation
17
+ """
18
+
19
+ import json
20
+ import argparse
21
+ from pathlib import Path
22
+ from typing import Any, Dict, List, Optional
23
+ from datasets import load_dataset
24
+
25
+
26
+ # Type mapping from Spider schema to SQLAlchemy
27
+ SQLALCHEMY_TYPE_MAP = {
28
+ "number": "Integer",
29
+ "int": "Integer",
30
+ "float": "Float",
31
+ "text": "String",
32
+ "string": "String",
33
+ "varchar": "String",
34
+ "char": "String",
35
+ "date": "Date",
36
+ "datetime": "DateTime",
37
+ "timestamp": "DateTime",
38
+ "time": "DateTime",
39
+ "boolean": "Boolean",
40
+ "bool": "Boolean",
41
+ }
42
+
43
+
44
+ def get_sqlalchemy_type(col_type: str) -> str:
45
+ """Convert Spider schema type to SQLAlchemy type."""
46
+ col_type_lower = col_type.lower().strip()
47
+
48
+ # Exact match
49
+ if col_type_lower in SQLALCHEMY_TYPE_MAP:
50
+ return SQLALCHEMY_TYPE_MAP[col_type_lower]
51
+
52
+ # Substring match (e.g., "varchar(255)" -> "String")
53
+ for key, sa_type in SQLALCHEMY_TYPE_MAP.items():
54
+ if key in col_type_lower:
55
+ return sa_type
56
+
57
+ # Default to String
58
+ return "String"
59
+
60
+
61
+ def generate_model_code(
62
+ db_id: str,
63
+ tables: List[Dict[str, Any]],
64
+ schema: Dict[str, Any],
65
+ ) -> str:
66
+ """Generate SQLAlchemy model code from schema.
67
+
68
+ Args:
69
+ db_id: Database ID
70
+ tables: List of table schemas
71
+ schema: Full schema dictionary with relationships
72
+
73
+ Returns:
74
+ Generated Python code as string
75
+ """
76
+ lines = [
77
+ f'"""',
78
+ f"SQLAlchemy ORM models for '{db_id}' database.",
79
+ f'",
80
+ f"Auto-generated from Spider schema dataset.",
81
+ f'"""',
82
+ f"",
83
+ f"from datetime import datetime",
84
+ f"from sqlalchemy import Column, Integer, String, Float, Date, DateTime, Boolean, ForeignKey",
85
+ f"from sqlalchemy.ext.declarative import declarative_base",
86
+ f"from sqlalchemy.orm import relationship",
87
+ f"",
88
+ f"Base = declarative_base()",
89
+ f"",
90
+ ]
91
+
92
+ # Generate model for each table
93
+ table_names = [t["name"] for t in tables]
94
+
95
+ for table in tables:
96
+ table_name = table["name"]
97
+ class_name = "".join(word.capitalize() for word in table_name.split("_"))
98
+
99
+ lines.append(f'class {class_name}(Base):')
100
+ lines.append(f' """Model for {table_name} table."""')
101
+ lines.append(f' __tablename__ = "{table_name}"')
102
+ lines.append(f"")
103
+
104
+ # Add columns
105
+ columns = table.get("columns", [])
106
+ for col in columns:
107
+ col_name = col["name"]
108
+ col_type = col.get("type", "text")
109
+ sa_type = get_sqlalchemy_type(col_type)
110
+
111
+ # Determine if primary key
112
+ is_pk = col.get("is_primary_key", False)
113
+
114
+ # Determine if foreign key
115
+ fk_str = ""
116
+ for fk in schema.get("foreign_keys", []):
117
+ if fk[0] == (table_names.index(table_name), columns.index(col)):
118
+ source_table_idx, target_table_idx = fk
119
+ target_col_idx = fk[2] if len(fk) > 2 else 0
120
+ target_table = table_names[target_table_idx]
121
+ target_col = tables[target_table_idx]["columns"][target_col_idx]["name"]
122
+ fk_str = f', ForeignKey("{target_table}.{target_col}")'
123
+
124
+ # Default nullable to False for primary keys
125
+ nullable = "False" if is_pk else "True"
126
+ pk_str = ", primary_key=True" if is_pk else ""
127
+
128
+ lines.append(
129
+ f' {col_name} = Column({sa_type}({col_type.split("(")[1].rstrip(")")} '
130
+ f'if "{sa_type}" == "String" else ""){pk_str}{fk_str}, nullable={nullable})'
131
+ )
132
+
133
+ lines.append(f"")
134
+
135
+ return "\n".join(lines)
136
+
137
+
138
+ def download_schema_and_generate_models(
139
+ db_id: str = "student_assessment",
140
+ split: str = "train",
141
+ output_dir: str = "data/models",
142
+ ) -> None:
143
+ """Download Spider schema and generate SQLAlchemy models.
144
+
145
+ Args:
146
+ db_id: Database ID to download schema for
147
+ split: Dataset split ("train" or "validation")
148
+ output_dir: Directory to save generated model files
149
+ """
150
+ output_path = Path(output_dir)
151
+ output_path.mkdir(parents=True, exist_ok=True)
152
+
153
+ print(f"Loading Spider schema dataset ({split} split)...")
154
+ dataset = load_dataset("richardr1126/spider-schema", split=split)
155
+
156
+ if db_id.lower() == "all":
157
+ # Generate models for all databases
158
+ processed = set()
159
+ for item in dataset:
160
+ current_db_id = item.get("db_id")
161
+ if current_db_id in processed:
162
+ continue
163
+ processed.add(current_db_id)
164
+
165
+ tables = item.get("table", [])
166
+ schema = {
167
+ "table_names": [t["name"] for t in tables],
168
+ "column_names": [col for t in tables for col in t.get("columns", [])],
169
+ "foreign_keys": item.get("foreign_keys", []),
170
+ }
171
+
172
+ # Generate code (simplified)
173
+ code = generate_simplified_models(current_db_id, tables)
174
+
175
+ filepath = output_path / f"{current_db_id}.py"
176
+ with open(filepath, "w") as f:
177
+ f.write(code)
178
+
179
+ print(f" {current_db_id}: {len(tables)} tables → {filepath}")
180
+ else:
181
+ # Filter for specific db_id
182
+ matching = [item for item in dataset if item.get("db_id") == db_id]
183
+
184
+ if not matching:
185
+ print(f"No schema found for db_id='{db_id}'")
186
+ return
187
+
188
+ item = matching[0]
189
+ tables = item.get("table", [])
190
+
191
+ # Generate simplified model code
192
+ code = generate_simplified_models(db_id, tables)
193
+
194
+ filepath = output_path / f"{db_id}.py"
195
+ with open(filepath, "w") as f:
196
+ f.write(code)
197
+
198
+ print(f"Found schema for db_id='{db_id}' with {len(tables)} tables")
199
+ print(f"Generated models → {filepath}")
200
+ print(f"\nTables: {', '.join(t['name'] for t in tables)}")
201
+
202
+
203
+ def generate_simplified_models(db_id: str, tables: List[Dict[str, Any]]) -> str:
204
+ """Generate SQLAlchemy models from table schema (simplified version).
205
+
206
+ Args:
207
+ db_id: Database ID
208
+ tables: List of table definitions from schema
209
+
210
+ Returns:
211
+ Generated Python code
212
+ """
213
+ lines = [
214
+ f'"""',
215
+ f"SQLAlchemy ORM models for '{db_id}' database.",
216
+ f'",
217
+ f"Auto-generated from Spider schema dataset.",
218
+ f'"""',
219
+ f"",
220
+ f"from datetime import datetime",
221
+ f"from sqlalchemy import Column, Integer, String, Float, Date, DateTime, Boolean, ForeignKey",
222
+ f"from sqlalchemy.ext.declarative import declarative_base",
223
+ f"from sqlalchemy.orm import relationship",
224
+ f"",
225
+ f"Base = declarative_base()",
226
+ f"",
227
+ ]
228
+
229
+ for table in tables:
230
+ table_name = table.get("name", "Unknown")
231
+ class_name = "".join(word.capitalize() for word in table_name.split("_"))
232
+
233
+ lines.append(f"")
234
+ lines.append(f"class {class_name}(Base):")
235
+ lines.append(f' """Model for {table_name} table."""')
236
+ lines.append(f' __tablename__ = "{table_name}"')
237
+ lines.append(f"")
238
+
239
+ # Add columns
240
+ columns = table.get("columns", [])
241
+ if columns:
242
+ for col in columns:
243
+ col_name = col.get("name", "unknown")
244
+ col_type = col.get("type", "text")
245
+ sa_type = get_sqlalchemy_type(col_type)
246
+
247
+ # Determine string length from type if specified
248
+ length_spec = ""
249
+ if sa_type == "String":
250
+ if "(" in col_type and ")" in col_type:
251
+ length = col_type.split("(")[1].split(")")[0]
252
+ if length.isdigit():
253
+ length_spec = f"({length})"
254
+ else:
255
+ length_spec = "(255)" # default
256
+
257
+ lines.append(f' {col_name} = Column({sa_type}{length_spec}, nullable=True)')
258
+ else:
259
+ lines.append(f" id = Column(Integer, primary_key=True)")
260
+
261
+ lines.append(f"")
262
+
263
+ return "\n".join(lines)
264
+
265
+
266
+ if __name__ == "__main__":
267
+ parser = argparse.ArgumentParser(
268
+ description="Download Spider schema and generate SQLAlchemy models",
269
+ formatter_class=argparse.RawDescriptionHelpFormatter,
270
+ )
271
+ parser.add_argument(
272
+ "--db-id",
273
+ type=str,
274
+ default="student_assessment",
275
+ help="Database ID to generate models for (or 'all' for all databases)",
276
+ )
277
+ parser.add_argument(
278
+ "--split",
279
+ type=str,
280
+ default="train",
281
+ choices=["train", "validation"],
282
+ help="Schema dataset split to use",
283
+ )
284
+ parser.add_argument(
285
+ "--output-dir",
286
+ type=str,
287
+ default="data/models",
288
+ help="Directory to save generated model files",
289
+ )
290
+
291
+ args = parser.parse_args()
292
+ download_schema_and_generate_models(
293
+ db_id=args.db_id, split=args.split, output_dir=args.output_dir
294
+ )
server/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """SQLEnv server components."""
2
+
3
+ from .sql_environment import SQLEnvironment
4
+
5
+ __all__ = ["SQLEnvironment"]
server/app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI application for the SQLEnv environment.
3
+
4
+ Exposes the SQLEnvironment over HTTP and WebSocket endpoints,
5
+ compatible with the OpenEnv EnvClient.
6
+
7
+ Usage:
8
+ # Development (with auto-reload):
9
+ uv run uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
10
+
11
+ # Via uv:
12
+ uv run server
13
+ """
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ # Load environment variables from .env file
19
+ try:
20
+ from dotenv import load_dotenv
21
+
22
+ env_file = Path(__file__).parent.parent / ".env"
23
+ if env_file.exists():
24
+ load_dotenv(env_file)
25
+ except ImportError:
26
+ pass # python-dotenv not installed, use system env vars
27
+
28
+ from openenv.core.env_server import create_app
29
+
30
+ try:
31
+ from sql_env.models import SQLAction, SQLObservation
32
+ from sql_env.server.sql_environment import SQLEnvironment
33
+ except ImportError:
34
+ # Fallback for Docker where PYTHONPATH=/app/env
35
+ from models import SQLAction, SQLObservation # type: ignore[no-redef]
36
+ from server.sql_environment import SQLEnvironment # type: ignore[no-redef]
37
+
38
+
39
+ def get_tokenizer():
40
+ """Get tokenizer from environment or use a mock for testing."""
41
+ tokenizer_name = os.environ.get(
42
+ "TOKENIZER_NAME", "mistralai/Mistral-7B-Instruct-v0.1"
43
+ )
44
+
45
+ try:
46
+ from transformers import AutoTokenizer
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
49
+ print(f"Loaded tokenizer: {tokenizer_name}")
50
+ return tokenizer
51
+ except ImportError:
52
+ print(
53
+ "Warning: transformers not installed, using mock tokenizer for testing only"
54
+ )
55
+ from server.test_sql_env import MockTokenizer
56
+
57
+ return MockTokenizer()
58
+
59
+
60
+ def create_sql_environment():
61
+ """Factory function that creates SQLEnvironment with tokenizer and paths."""
62
+ tokenizer = get_tokenizer()
63
+ questions_path = os.environ.get(
64
+ "QUESTIONS_PATH",
65
+ str(
66
+ Path(__file__).parent.parent
67
+ / "data"
68
+ / "questions"
69
+ / "student_assessment.json"
70
+ ),
71
+ )
72
+ db_dir = os.environ.get(
73
+ "DB_DIR",
74
+ str(Path(__file__).parent.parent / "data" / "databases"),
75
+ )
76
+ return SQLEnvironment(
77
+ questions_path=questions_path,
78
+ db_dir=db_dir,
79
+ tokenizer=tokenizer,
80
+ )
81
+
82
+
83
+ # Create the FastAPI app
84
+ app = create_app(
85
+ create_sql_environment,
86
+ SQLAction,
87
+ SQLObservation,
88
+ env_name="sql_env",
89
+ )
90
+
91
+
92
+ def main(host: str = "0.0.0.0", port: int = 8000):
93
+ """Entry point for running the server directly.
94
+
95
+ Enables:
96
+ uv run server
97
+ python -m sql_env.server.app
98
+ """
99
+ import uvicorn
100
+
101
+ uvicorn.run(app, host=host, port=port)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ import argparse
106
+
107
+ parser = argparse.ArgumentParser()
108
+ parser.add_argument("--port", type=int, default=8000)
109
+ args = parser.parse_args()
110
+ main(port=args.port)
server/install_deps.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Additional setup for sql_env
3
+ set -e
4
+
5
+ # Install Python dependencies
6
+ pip install --no-cache-dir -r /tmp/requirements.txt
7
+
8
+ # Set up cache directory for Hugging Face models
9
+ mkdir -p /.cache && chmod 777 /.cache
10
+
11
+ # Pre-download the GPT-2 model to avoid permission issues during runtime
12
+ python -c "from transformers import GPT2Tokenizer; GPT2Tokenizer.from_pretrained('gpt2')"
server/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi>=0.104.0
2
+ openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git
3
+ pydantic>=2.0.0
4
+ torch==2.2.2
5
+ transformers
6
+ uvicorn>=0.24.0
server/reward.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reward helpers for SQLEnv dense shaping."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import math
7
+
8
+ try:
9
+ from sql_env.models import EpisodeContext
10
+ except ImportError: # pragma: no cover - Docker fallback import path
11
+ from models import EpisodeContext # type: ignore[no-redef]
12
+
13
+
14
+ _EXEC_OK_REWARD = 0.02
15
+ _NEW_INFO_REWARD = 0.01
16
+ _NEW_INFO_CAP = 0.10
17
+ _REPEAT_PENALTY = 0.01
18
+ _STEP_COST = 0.005
19
+ _LAYER2_CARDINALITY_WEIGHT = 0.25
20
+ _LAYER2_VALUE_OVERLAP_WEIGHT = 0.50
21
+ _LAYER2_NUMERIC_RANGE_WEIGHT = 0.25
22
+ _LAYER2_IMPROVEMENT_SCALE = 0.15
23
+ _STEP_REWARD_FLOOR = -0.2
24
+ _STEP_REWARD_CAP = 0.5
25
+
26
+
27
+ def compute_step_reward(
28
+ ctx: EpisodeContext,
29
+ action_type: str,
30
+ sql: str,
31
+ rows: list[tuple] | None,
32
+ error: str | None,
33
+ ) -> float:
34
+ """Compute one dense step reward and clamp cumulative episode shaping.
35
+
36
+ Combines Layer 1 operational shaping with Layer 2 progress shaping for
37
+ successful QUERY actions, then clamps cumulative step reward to
38
+ ``[-0.2, 0.5]`` and returns only the clamped delta for this step.
39
+ """
40
+
41
+ step_reward = _layer1_operational(ctx, action_type, sql, rows, error)
42
+
43
+ if action_type.upper() == "QUERY" and rows is not None and error is None:
44
+ step_reward += _layer2_progress(ctx, rows)
45
+
46
+ unclamped_total = ctx.cumulative_step_reward + step_reward
47
+ clamped_total = min(_STEP_REWARD_CAP, max(_STEP_REWARD_FLOOR, unclamped_total))
48
+ clamped_delta = clamped_total - ctx.cumulative_step_reward
49
+ ctx.cumulative_step_reward = clamped_total
50
+
51
+ return clamped_delta
52
+
53
+
54
+ def _layer1_operational(
55
+ ctx: EpisodeContext,
56
+ action_type: str,
57
+ sql: str,
58
+ rows: list[tuple] | None,
59
+ error: str | None,
60
+ ) -> float:
61
+ """Compute Layer 1 operational reward signals.
62
+
63
+ Layer 1 applies:
64
+ - `+0.02` for successful execution (`error is None`)
65
+ - `+0.01` new-info for first-seen successful QUERY (capped at 0.10 cumulative)
66
+ - `-0.01` repeat penalty for repeated QUERY SQL
67
+ - `-0.005` step cost on every call
68
+ """
69
+
70
+ reward = -_STEP_COST
71
+
72
+ is_query = action_type.upper() == "QUERY"
73
+ query_hash: str | None = None
74
+ is_repeat = False
75
+
76
+ if is_query and sql:
77
+ query_hash = hashlib.sha256(sql.encode("utf-8")).hexdigest()
78
+ is_repeat = query_hash in ctx.query_hashes
79
+
80
+ if is_repeat:
81
+ reward -= _REPEAT_PENALTY
82
+ elif error is None:
83
+ reward += _EXEC_OK_REWARD
84
+
85
+ if (
86
+ is_query
87
+ and error is None
88
+ and rows is not None
89
+ and query_hash is not None
90
+ and not is_repeat
91
+ ):
92
+ ctx.query_hashes.add(query_hash)
93
+ if ctx.cumulative_new_info_reward < _NEW_INFO_CAP:
94
+ remaining = _NEW_INFO_CAP - ctx.cumulative_new_info_reward
95
+ delta = min(_NEW_INFO_REWARD, remaining)
96
+ ctx.cumulative_new_info_reward += delta
97
+ reward += delta
98
+
99
+ return reward
100
+
101
+
102
+ def _cardinality_score(pred_rows: list[tuple], gold_rows: list[tuple]) -> float:
103
+ """Compute row-count similarity score in [0.0, 1.0]."""
104
+
105
+ pred_count = len(pred_rows)
106
+ gold_count = len(gold_rows)
107
+ denominator = max(pred_count, gold_count, 1)
108
+ score = 1.0 - (abs(pred_count - gold_count) / denominator)
109
+ return max(0.0, min(1.0, score))
110
+
111
+
112
+ def _value_overlap_score(pred_rows: list[tuple], gold_rows: list[tuple]) -> float:
113
+ """Compute Jaccard overlap of flattened cell values as strings."""
114
+
115
+ pred_values = {str(cell) for row in pred_rows for cell in row}
116
+ gold_values = {str(cell) for row in gold_rows for cell in row}
117
+
118
+ union = pred_values | gold_values
119
+ if not union:
120
+ return 0.0
121
+
122
+ intersection = pred_values & gold_values
123
+ return len(intersection) / len(union)
124
+
125
+
126
+ def _numeric_range_score(pred_rows: list[tuple], gold_rows: list[tuple]) -> float:
127
+ """Compute log-distance proximity for numeric cell values."""
128
+
129
+ def _is_numeric(value: object) -> bool:
130
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
131
+
132
+ pred_numerics = [float(cell) for row in pred_rows for cell in row if _is_numeric(cell)]
133
+ gold_numerics = [float(cell) for row in gold_rows for cell in row if _is_numeric(cell)]
134
+
135
+ if not gold_numerics:
136
+ return 1.0
137
+ if not pred_numerics:
138
+ return 0.0
139
+
140
+ total = 0.0
141
+ for gold_value in gold_numerics:
142
+ closest_distance = min(abs(pred_value - gold_value) for pred_value in pred_numerics)
143
+ total += 1.0 / (1.0 + math.log1p(closest_distance))
144
+
145
+ return total / len(gold_numerics)
146
+
147
+
148
+ def _bin_progress(raw_score: float) -> float:
149
+ """Bin raw progress to one of {0.0, 0.25, 0.5, 0.75, 1.0}."""
150
+
151
+ clamped_score = max(0.0, min(1.0, raw_score))
152
+ if clamped_score < 0.125:
153
+ return 0.0
154
+ if clamped_score < 0.375:
155
+ return 0.25
156
+ if clamped_score < 0.625:
157
+ return 0.5
158
+ if clamped_score < 0.875:
159
+ return 0.75
160
+ return 1.0
161
+
162
+
163
+ def _layer2_progress(ctx: EpisodeContext, rows: list[tuple]) -> float:
164
+ """Compute Layer 2 progress reward with improvement-only gating."""
165
+
166
+ if not ctx.gold_rows:
167
+ return 0.0
168
+
169
+ cardinality = _cardinality_score(rows, ctx.gold_rows)
170
+ value_overlap = _value_overlap_score(rows, ctx.gold_rows)
171
+ numeric_range = _numeric_range_score(rows, ctx.gold_rows)
172
+
173
+ raw_progress = (
174
+ _LAYER2_CARDINALITY_WEIGHT * cardinality
175
+ + _LAYER2_VALUE_OVERLAP_WEIGHT * value_overlap
176
+ + _LAYER2_NUMERIC_RANGE_WEIGHT * numeric_range
177
+ )
178
+ binned_progress = _bin_progress(raw_progress)
179
+
180
+ if binned_progress <= ctx.best_progress:
181
+ return 0.0
182
+
183
+ progress_delta = binned_progress - ctx.best_progress
184
+ ctx.best_progress = binned_progress
185
+ return progress_delta * _LAYER2_IMPROVEMENT_SCALE
server/sql_environment.py ADDED
@@ -0,0 +1,635 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ import random
5
+ import re
6
+ import sqlite3
7
+ import time
8
+ import uuid
9
+
10
+ from openenv.core.env_server.interfaces import Environment, Message, ModelTokenizer, Transform
11
+
12
+ from .reward import compute_step_reward
13
+ from .verifier import verify_answer
14
+
15
+ try:
16
+ from sql_env.models import EpisodeContext, QuestionRecord, SQLAction, SQLObservation, SQLState
17
+ except ImportError:
18
+ # Fallback for Docker where PYTHONPATH=/app/env
19
+ from models import ( # type: ignore[no-redef]
20
+ EpisodeContext,
21
+ QuestionRecord,
22
+ SQLAction,
23
+ SQLObservation,
24
+ SQLState,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ _TABLE_FROM_JOIN_PATTERN = re.compile(
30
+ r"\b(?:FROM|JOIN)\s+([A-Za-z_][A-Za-z0-9_]*)", re.IGNORECASE
31
+ )
32
+ _FIRST_KEYWORD_PATTERN = re.compile(r"^[\s\n\r\t]*(\w+)")
33
+
34
+
35
+ class SQLEnvironment(Environment[SQLAction, SQLObservation, SQLState]):
36
+ """SQLEnv server implementation with a structured SQL action loop."""
37
+
38
+ def __init__(
39
+ self,
40
+ questions_path: str,
41
+ db_dir: str,
42
+ tokenizer: ModelTokenizer,
43
+ step_budget: int = 15,
44
+ transform: Transform | None = None,
45
+ ):
46
+ super().__init__(transform=transform)
47
+
48
+ if not hasattr(tokenizer, "apply_chat_template"):
49
+ raise ValueError("Tokenizer must have 'apply_chat_template' method")
50
+ if step_budget <= 0:
51
+ raise ValueError("step_budget must be a positive integer")
52
+
53
+ questions_file = Path(questions_path)
54
+ database_dir = Path(db_dir)
55
+ if not questions_file.exists():
56
+ raise FileNotFoundError(f"Questions file not found: {questions_file}")
57
+ if not database_dir.exists() or not database_dir.is_dir():
58
+ raise FileNotFoundError(f"Database directory not found: {database_dir}")
59
+
60
+ self.tokenizer = tokenizer
61
+ self.questions_path = questions_file
62
+ self.db_dir = database_dir
63
+ self.step_budget = step_budget
64
+ self.questions = self._load_questions(str(questions_file))
65
+
66
+ if not self.questions:
67
+ raise ValueError("Questions file contains no questions")
68
+
69
+ self._episode: EpisodeContext | None = None
70
+ self._last_result = ""
71
+ self._last_error = ""
72
+ self._last_reward: float | None = None
73
+ self._last_query_truncated = False
74
+
75
+ self._state = SQLState()
76
+
77
+ def _extract_tables_from_sql(self, sql: str) -> list[str]:
78
+ """Extract table names from basic FROM/JOIN clauses."""
79
+ tables: list[str] = []
80
+ for match in _TABLE_FROM_JOIN_PATTERN.findall(sql):
81
+ if match not in tables:
82
+ tables.append(match)
83
+ return tables
84
+
85
+ def _load_questions(self, path: str) -> list[QuestionRecord]:
86
+ """Load Spider questions JSON into QuestionRecord instances."""
87
+ questions_path = Path(path)
88
+ if not questions_path.exists():
89
+ raise FileNotFoundError(f"Questions file not found: {questions_path}")
90
+
91
+ try:
92
+ with questions_path.open("r", encoding="utf-8") as handle:
93
+ payload = json.load(handle)
94
+ except json.JSONDecodeError as exc:
95
+ raise ValueError(f"Invalid questions JSON format: {questions_path}") from exc
96
+
97
+ if not isinstance(payload, list):
98
+ raise ValueError("Questions JSON must be an array of records")
99
+
100
+ question_records: list[QuestionRecord] = []
101
+ for idx, item in enumerate(payload):
102
+ if not isinstance(item, dict):
103
+ raise ValueError(f"Question at index {idx} must be an object")
104
+
105
+ question_text = item.get("question")
106
+ db_name = item.get("db_id")
107
+ gold_sql = item.get("query")
108
+
109
+ if not isinstance(question_text, str) or not question_text.strip():
110
+ raise ValueError(f"Question at index {idx} missing non-empty 'question'")
111
+ if not isinstance(db_name, str) or not db_name.strip():
112
+ raise ValueError(f"Question at index {idx} missing non-empty 'db_id'")
113
+ if not isinstance(gold_sql, str) or not gold_sql.strip():
114
+ raise ValueError(f"Question at index {idx} missing non-empty 'query'")
115
+
116
+ normalized_db_name = db_name.strip()
117
+ if not re.fullmatch(r"[A-Za-z0-9_]+", normalized_db_name):
118
+ raise ValueError(
119
+ f"Question at index {idx} has invalid db_id '{normalized_db_name}'"
120
+ )
121
+
122
+ question_records.append(
123
+ QuestionRecord(
124
+ question_id=f"q-{idx}",
125
+ question_text=question_text,
126
+ database_name=normalized_db_name,
127
+ gold_sql=gold_sql,
128
+ gold_answer="",
129
+ answer_type="string",
130
+ difficulty="medium",
131
+ tables_involved=self._extract_tables_from_sql(gold_sql),
132
+ )
133
+ )
134
+
135
+ return question_records
136
+
137
+ def _open_db(self, db_name: str) -> sqlite3.Connection:
138
+ """Open a read-only SQLite connection for the requested database."""
139
+ normalized_db_name = db_name.strip()
140
+ if not re.fullmatch(r"[A-Za-z0-9_]+", normalized_db_name):
141
+ raise ValueError(f"Invalid database name: '{db_name}'")
142
+
143
+ candidates = [
144
+ (self.db_dir / normalized_db_name / f"{normalized_db_name}.sqlite").resolve(),
145
+ (self.db_dir / f"{normalized_db_name}.sqlite").resolve(),
146
+ ]
147
+
148
+ db_root = self.db_dir.resolve()
149
+ db_path = next(
150
+ (
151
+ candidate
152
+ for candidate in candidates
153
+ if candidate.exists() and db_root in candidate.parents
154
+ ),
155
+ None,
156
+ )
157
+ if db_path is None:
158
+ raise FileNotFoundError(
159
+ f"Database '{normalized_db_name}' not found in {self.db_dir}"
160
+ )
161
+
162
+ uri = f"file:{db_path}?mode=ro"
163
+ return sqlite3.connect(uri, uri=True)
164
+
165
+ def _format_gold_answer(self, rows: list[tuple]) -> str:
166
+ """Convert SQL rows into a stable string answer for episode comparison."""
167
+ if not rows:
168
+ return ""
169
+ if len(rows) == 1 and len(rows[0]) == 1:
170
+ return str(rows[0][0])
171
+ return "\n".join(" | ".join(str(value) for value in row) for row in rows)
172
+
173
+ def _execute_gold_sql(
174
+ self,
175
+ connection: sqlite3.Connection,
176
+ sql: str,
177
+ timeout_s: float = 5.0,
178
+ ) -> list[tuple]:
179
+ """Execute gold SQL with read-only/SELECT-only timeout protections."""
180
+ sql_stripped = sql.strip()
181
+ if not sql_stripped:
182
+ raise ValueError("SQL query cannot be empty")
183
+
184
+ first_keyword_match = _FIRST_KEYWORD_PATTERN.match(sql_stripped)
185
+ first_keyword = (
186
+ first_keyword_match.group(1).upper() if first_keyword_match else ""
187
+ )
188
+ if first_keyword != "SELECT":
189
+ raise ValueError(f"Only SELECT queries are allowed. Got: {first_keyword}")
190
+
191
+ deadline = time.monotonic() + timeout_s
192
+
193
+ def _progress_callback() -> int:
194
+ return 1 if time.monotonic() > deadline else 0
195
+
196
+ connection.set_progress_handler(_progress_callback, 1000)
197
+ try:
198
+ cursor = connection.cursor()
199
+ cursor.execute(sql_stripped)
200
+ return cursor.fetchall()
201
+ except sqlite3.OperationalError as exc:
202
+ if "interrupted" in str(exc).lower():
203
+ raise sqlite3.OperationalError(
204
+ f"Query timed out after {timeout_s:.1f} seconds"
205
+ ) from exc
206
+ raise
207
+ finally:
208
+ connection.set_progress_handler(None, 0)
209
+
210
+ def reset(
211
+ self,
212
+ *,
213
+ seed: int | None = None,
214
+ episode_id: str | None = None,
215
+ **kwargs,
216
+ ) -> SQLObservation:
217
+ """Reset episode context and return the initial rich observation."""
218
+ del kwargs
219
+
220
+ if self._episode is not None:
221
+ self._episode.db_connection.close()
222
+
223
+ chooser = random.Random(seed) if seed is not None else random
224
+ question = chooser.choice(self.questions)
225
+ connection = self._open_db(question.database_name)
226
+
227
+ try:
228
+ gold_rows = self._execute_gold_sql(connection, question.gold_sql)
229
+ except sqlite3.Error:
230
+ connection.close()
231
+ raise
232
+
233
+ gold_answer = self._format_gold_answer(gold_rows)
234
+ question_for_episode = QuestionRecord(
235
+ question_id=question.question_id,
236
+ question_text=question.question_text,
237
+ database_name=question.database_name,
238
+ gold_sql=question.gold_sql,
239
+ gold_answer=gold_answer,
240
+ answer_type=question.answer_type,
241
+ difficulty=question.difficulty,
242
+ tables_involved=list(question.tables_involved),
243
+ )
244
+
245
+ resolved_episode_id = episode_id or str(uuid.uuid4())
246
+ self._episode = EpisodeContext(
247
+ episode_id=resolved_episode_id,
248
+ db_connection=connection,
249
+ question_record=question_for_episode,
250
+ step_count=0,
251
+ budget=self.step_budget,
252
+ done=False,
253
+ gold_answer=gold_answer,
254
+ gold_rows=gold_rows,
255
+ )
256
+
257
+ self._state.episode_id = resolved_episode_id
258
+ self._state.step_count = 0
259
+ self._state.current_action_type = "QUERY"
260
+ self._state.history_messages = []
261
+ self._state.history_tokens = []
262
+
263
+ self._last_result = ""
264
+ self._last_error = ""
265
+ self._last_reward = None
266
+ self._last_query_truncated = False
267
+
268
+ return self._build_observation()
269
+
270
+ def _get_table_names(self, connection: sqlite3.Connection) -> list[str]:
271
+ """Return user-visible table names for the active SQLite database."""
272
+ cursor = connection.cursor()
273
+ cursor.execute(
274
+ """
275
+ SELECT name
276
+ FROM sqlite_master
277
+ WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
278
+ ORDER BY name
279
+ """
280
+ )
281
+ return [str(row[0]) for row in cursor.fetchall()]
282
+
283
+ def _resolve_table_name(self, table_name: str) -> tuple[str | None, list[str]]:
284
+ """Resolve requested table name against active DB tables."""
285
+ if self._episode is None:
286
+ return None, []
287
+ available_tables = self._get_table_names(self._episode.db_connection)
288
+ lookup = {table.lower(): table for table in available_tables}
289
+ resolved = lookup.get(table_name.strip().lower())
290
+ return resolved, available_tables
291
+
292
+ def _format_rows(self, rows: list[tuple]) -> str:
293
+ """Format SQL rows as readable text."""
294
+ if not rows:
295
+ return "No rows returned."
296
+ lines = [f"{idx}. {' | '.join(str(value) for value in row)}" for idx, row in enumerate(rows, start=1)]
297
+ return "\n".join(lines)
298
+
299
+ def _execute_sql(self, sql: str, timeout_s: float = 5.0) -> list[tuple]:
300
+ """Execute SQL in sandbox: SELECT-only, single statement, timeout, truncation."""
301
+ if self._episode is None:
302
+ raise RuntimeError("No active episode. Call reset() before step().")
303
+
304
+ sql_stripped = sql.strip()
305
+ if not sql_stripped:
306
+ raise ValueError("SQL query cannot be empty")
307
+
308
+ first_keyword_match = _FIRST_KEYWORD_PATTERN.match(sql_stripped)
309
+ first_keyword = (
310
+ first_keyword_match.group(1).upper() if first_keyword_match else ""
311
+ )
312
+ if first_keyword != "SELECT":
313
+ raise ValueError(f"Only SELECT queries are allowed. Got: {first_keyword}")
314
+
315
+ single_statement_sql = sql_stripped.rstrip(";").strip()
316
+ if ";" in single_statement_sql:
317
+ raise ValueError("Only a single SELECT statement is allowed")
318
+
319
+ deadline = time.monotonic() + timeout_s
320
+
321
+ def _progress_callback() -> int:
322
+ return 1 if time.monotonic() > deadline else 0
323
+
324
+ connection = self._episode.db_connection
325
+ connection.set_progress_handler(_progress_callback, 1000)
326
+
327
+ self._last_query_truncated = False
328
+ try:
329
+ cursor = connection.cursor()
330
+ cursor.execute(sql_stripped)
331
+ rows = cursor.fetchmany(21)
332
+ if len(rows) > 20:
333
+ self._last_query_truncated = True
334
+ rows = rows[:20]
335
+ return rows
336
+ except sqlite3.OperationalError as exc:
337
+ if "interrupted" in str(exc).lower():
338
+ raise sqlite3.OperationalError(
339
+ f"Query timed out after {timeout_s:.1f} seconds"
340
+ ) from exc
341
+ raise
342
+ finally:
343
+ connection.set_progress_handler(None, 0)
344
+
345
+ def _handle_describe(self, table_name: str) -> str:
346
+ """Return table schema and row count."""
347
+ if self._episode is None:
348
+ raise RuntimeError("No active episode. Call reset() before step().")
349
+
350
+ requested = table_name.strip()
351
+ if not requested:
352
+ raise ValueError("Argument cannot be empty for DESCRIBE")
353
+
354
+ resolved_table, available_tables = self._resolve_table_name(requested)
355
+ if resolved_table is None:
356
+ available = ", ".join(available_tables) if available_tables else "none"
357
+ raise ValueError(
358
+ f"Table '{requested}' not found. Available tables: {available}"
359
+ )
360
+
361
+ safe_identifier = resolved_table.replace('"', '""')
362
+ cursor = self._episode.db_connection.cursor()
363
+ cursor.execute(f'PRAGMA table_info("{safe_identifier}")')
364
+ columns = cursor.fetchall()
365
+ if not columns:
366
+ raise ValueError(f"Table '{resolved_table}' has no visible columns")
367
+
368
+ cursor.execute(f'SELECT COUNT(*) FROM "{safe_identifier}"')
369
+ row_count = int(cursor.fetchone()[0])
370
+ self._episode.described_tables.add(resolved_table)
371
+
372
+ lines = [f"Table '{resolved_table}' columns:"]
373
+ for _, col_name, col_type, _, _, _ in columns:
374
+ normalized_type = str(col_type).strip() or "UNKNOWN"
375
+ lines.append(f"- {col_name}: {normalized_type}")
376
+ lines.append(f"Row count: {row_count}")
377
+ return "\n".join(lines)
378
+
379
+ def _handle_sample(self, table_name: str, limit: int = 5) -> str:
380
+ """Return sample rows from a table."""
381
+ if self._episode is None:
382
+ raise RuntimeError("No active episode. Call reset() before step().")
383
+
384
+ requested = table_name.strip()
385
+ if not requested:
386
+ raise ValueError("Argument cannot be empty for SAMPLE")
387
+
388
+ resolved_table, available_tables = self._resolve_table_name(requested)
389
+ if resolved_table is None:
390
+ available = ", ".join(available_tables) if available_tables else "none"
391
+ raise ValueError(
392
+ f"Table '{requested}' not found. Available tables: {available}"
393
+ )
394
+
395
+ safe_identifier = resolved_table.replace('"', '""')
396
+ bounded_limit = max(1, min(limit, 20))
397
+ rows = self._execute_sql(
398
+ f'SELECT * FROM "{safe_identifier}" LIMIT {bounded_limit}'
399
+ )
400
+ return f"Sample from '{resolved_table}':\n{self._format_rows(rows)}"
401
+
402
+ def _handle_query(self, sql: str) -> tuple[str, list[tuple]]:
403
+ """Execute query and return formatted output with raw result rows."""
404
+ sql_text = sql.strip()
405
+ if not sql_text:
406
+ raise ValueError("Argument cannot be empty for QUERY")
407
+
408
+ rows = self._execute_sql(sql_text, timeout_s=5.0)
409
+ output = self._format_rows(rows)
410
+ if self._last_query_truncated:
411
+ output = f"{output}\n... (truncated to 20 rows)"
412
+ return output, rows
413
+
414
+ def _handle_answer(self, value: str) -> tuple[bool, float]:
415
+ """Compare submitted answer against episode gold answer."""
416
+ if self._episode is None:
417
+ raise RuntimeError("No active episode. Call reset() before step().")
418
+
419
+ is_correct = verify_answer(
420
+ predicted=value,
421
+ gold=self._episode.gold_answer or "",
422
+ answer_type=self._episode.question_record.answer_type,
423
+ gold_rows=self._episode.gold_rows,
424
+ )
425
+ self._episode.done = True
426
+ return is_correct, 1.0 if is_correct else 0.0
427
+
428
+ def step(
429
+ self,
430
+ action: SQLAction,
431
+ *,
432
+ timeout_s: float = 30,
433
+ **kwargs,
434
+ ) -> SQLObservation:
435
+ """Dispatch one structured action and return updated observation."""
436
+ del timeout_s
437
+ del kwargs
438
+
439
+ if self._episode is None:
440
+ self._last_result = ""
441
+ self._last_error = "No active episode. Call reset() before step()."
442
+ self._last_reward = None
443
+ return self._build_observation()
444
+
445
+ if self._episode.done:
446
+ return self._build_observation()
447
+
448
+ action_type = str(action.action_type).strip().upper()
449
+ argument = str(action.argument)
450
+
451
+ self._state.current_action_type = action_type or "QUERY"
452
+ self._last_result = ""
453
+ self._last_error = ""
454
+ self._last_reward = None
455
+ reward_rows: list[tuple] | None = []
456
+ reward_sql = ""
457
+
458
+ def _consume_invalid_step(error_text: str) -> SQLObservation:
459
+ self._last_error = error_text
460
+ self._episode.step_count += 1
461
+ self._episode.budget = max(0, self._episode.budget - 1)
462
+ self._episode.action_log.append(f"{action_type} -> ERROR: {error_text}")
463
+ if self._episode.budget == 0:
464
+ self._episode.done = True
465
+ self._last_reward = 0.0
466
+ self._state.step_count = self._episode.step_count
467
+ return self._build_observation()
468
+
469
+ valid_action_types = {"DESCRIBE", "SAMPLE", "QUERY", "ANSWER"}
470
+ if action_type not in valid_action_types:
471
+ return _consume_invalid_step(
472
+ f"Unknown action type '{action.action_type}'. "
473
+ "Valid types: DESCRIBE, SAMPLE, QUERY, ANSWER"
474
+ )
475
+
476
+ argument_stripped = argument.strip()
477
+ if not argument_stripped:
478
+ return _consume_invalid_step(
479
+ f"Argument cannot be empty for {action_type}"
480
+ )
481
+
482
+ try:
483
+ if action_type == "DESCRIBE":
484
+ self._last_result = self._handle_describe(argument_stripped)
485
+ elif action_type == "SAMPLE":
486
+ self._last_result = self._handle_sample(argument_stripped)
487
+ elif action_type == "QUERY":
488
+ reward_sql = argument_stripped
489
+ self._last_result, reward_rows = self._handle_query(argument_stripped)
490
+ else:
491
+ is_correct, reward = self._handle_answer(argument_stripped)
492
+ verdict = "correct" if is_correct else "incorrect"
493
+ self._last_result = f"Answer submitted: {verdict}."
494
+ self._last_reward = reward
495
+ self._episode.step_count += 1
496
+ self._episode.action_log.append(
497
+ f"ANSWER {argument_stripped} -> {verdict}"
498
+ )
499
+ self._state.step_count = self._episode.step_count
500
+ return self._build_observation()
501
+
502
+ except ValueError as exc:
503
+ self._last_error = str(exc)
504
+ except sqlite3.Error as exc:
505
+ self._last_error = f"SQL error: {exc}"
506
+
507
+ self._episode.step_count += 1
508
+ self._episode.budget = max(0, self._episode.budget - 1)
509
+ self._state.step_count = self._episode.step_count
510
+
511
+ if self._episode.budget > 0:
512
+ self._last_reward = compute_step_reward(
513
+ ctx=self._episode,
514
+ action_type=action_type,
515
+ sql=reward_sql,
516
+ rows=reward_rows,
517
+ error=self._last_error or None,
518
+ )
519
+
520
+ if self._last_error:
521
+ self._episode.action_log.append(f"{action_type} -> ERROR: {self._last_error}")
522
+ else:
523
+ preview = self._last_result.splitlines()[0] if self._last_result else "ok"
524
+ self._episode.action_log.append(f"{action_type} -> {preview}")
525
+
526
+ if self._episode.budget == 0:
527
+ self._episode.done = True
528
+ if self._last_reward is None:
529
+ self._last_reward = 0.0
530
+
531
+ return self._build_observation()
532
+
533
+ def _build_observation(self) -> SQLObservation:
534
+ """Construct a rich observation from the current episode context."""
535
+ if self._episode is None:
536
+ observation = SQLObservation(
537
+ question="",
538
+ schema_info="",
539
+ result=self._last_result,
540
+ error=self._last_error,
541
+ step_count=0,
542
+ budget_remaining=0,
543
+ action_history=[],
544
+ done=False,
545
+ reward=self._last_reward,
546
+ )
547
+ else:
548
+ table_names = self._get_table_names(self._episode.db_connection)
549
+ known_tables = set(table_names)
550
+ schema_lines = ["Available tables:", *[f"- {name}" for name in table_names]]
551
+
552
+ if self._episode.described_tables:
553
+ schema_lines.append("")
554
+ schema_lines.append("Described tables:")
555
+ for table_name in sorted(self._episode.described_tables):
556
+ if table_name not in known_tables:
557
+ schema_lines.append(
558
+ f"- {table_name}: unavailable (not in active schema)"
559
+ )
560
+ continue
561
+ safe_identifier = table_name.replace('"', '""')
562
+ cursor = self._episode.db_connection.cursor()
563
+ cursor.execute(f'PRAGMA table_info("{safe_identifier}")')
564
+ columns = cursor.fetchall()
565
+ if not columns:
566
+ schema_lines.append(f"- {table_name}: no columns available")
567
+ continue
568
+ column_summary = ", ".join(
569
+ f"{str(column[1])} {str(column[2]) or 'UNKNOWN'}"
570
+ for column in columns
571
+ )
572
+ schema_lines.append(f"- {table_name}: {column_summary}")
573
+
574
+ observation = SQLObservation(
575
+ question=self._episode.question_record.question_text,
576
+ schema_info="\n".join(schema_lines),
577
+ result=self._last_result,
578
+ error=self._last_error,
579
+ step_count=self._episode.step_count,
580
+ budget_remaining=self._episode.budget,
581
+ action_history=list(self._episode.action_log),
582
+ done=self._episode.done,
583
+ reward=self._last_reward,
584
+ )
585
+
586
+ transformed = self._apply_transform(observation)
587
+ if isinstance(transformed, SQLObservation):
588
+ return transformed
589
+
590
+ return SQLObservation(
591
+ question=getattr(transformed, "question", ""),
592
+ schema_info=getattr(transformed, "schema_info", ""),
593
+ result=getattr(transformed, "result", ""),
594
+ error=getattr(transformed, "error", ""),
595
+ step_count=getattr(transformed, "step_count", 0),
596
+ budget_remaining=getattr(transformed, "budget_remaining", 0),
597
+ action_history=getattr(transformed, "action_history", []),
598
+ done=transformed.done,
599
+ reward=transformed.reward,
600
+ )
601
+
602
+ @property
603
+ def state(self) -> SQLState:
604
+ """Get current exposed state metadata."""
605
+ return self._state
606
+
607
+ def message_to_action(self, message: Message) -> SQLAction:
608
+ """Convert free-form messages into structured SQLAction values."""
609
+ if "role" not in message:
610
+ raise ValueError("Message must contain a 'role' key")
611
+ if "content" not in message:
612
+ raise ValueError("Message must contain a 'content' key")
613
+ if message["content"] is None:
614
+ raise ValueError("Message content cannot be None")
615
+
616
+ content = str(message["content"])
617
+ parsed = content.strip()
618
+
619
+ action_type = "QUERY"
620
+ argument = content
621
+
622
+ if message["role"].lower() == "user" and parsed:
623
+ prefix, separator, remainder = parsed.partition(" ")
624
+ normalized_prefix = prefix.upper()
625
+ if normalized_prefix in {"DESCRIBE", "SAMPLE", "QUERY", "ANSWER"}:
626
+ action_type = normalized_prefix
627
+ if separator:
628
+ argument = remainder
629
+ else:
630
+ argument = ""
631
+
632
+ self._state.current_action_type = action_type
633
+ self._state.history_messages.append(message)
634
+
635
+ return SQLAction(action_type=action_type, argument=argument)
server/synthetic/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Synthetic database generation utilities for metamorphic testing."""
2
+
3
+ from .generate import VariantResult, generate_variant, generate_variants_for_question
4
+ from .mutations import (
5
+ MutationResult,
6
+ TableSchema,
7
+ detect_bridge_tables,
8
+ duplicate_bridge_rows,
9
+ get_table_schemas,
10
+ inject_irrelevant_rows,
11
+ remap_ids,
12
+ )
13
+
14
+ __all__ = [
15
+ "MutationResult",
16
+ "TableSchema",
17
+ "VariantResult",
18
+ "detect_bridge_tables",
19
+ "duplicate_bridge_rows",
20
+ "generate_variant",
21
+ "generate_variants_for_question",
22
+ "get_table_schemas",
23
+ "inject_irrelevant_rows",
24
+ "remap_ids",
25
+ ]