Upload folder using huggingface_hub
Browse files- progress.log +3 -0
- server/app.py +10 -7
- specs/F007-DEMO.md +55 -125
- specs/F007-VERIFICATION_REPORT.md +1 -1
- specs/FEATURES.json +5 -5
progress.log
CHANGED
|
@@ -29,3 +29,6 @@
|
|
| 29 |
[2026-03-28T23:14:35+0100] Max iterations: 20
|
| 30 |
[2026-03-28T23:27:11+0100] Iteration 1/20 | Step: unknown | action=legacy | reason=permission_denied
|
| 31 |
[2026-03-28T23:32:56+0100] Iteration 2/20 | Step: unknown | action=blocked | reason=Final verification gate is blocked because `uv run openenv build -t openenv-sql-env-f007-hf-submission` still fails with `No space left on device`; build-success evidence is required before F007 can transition from verifying to complete.
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
[2026-03-28T23:14:35+0100] Max iterations: 20
|
| 30 |
[2026-03-28T23:27:11+0100] Iteration 1/20 | Step: unknown | action=legacy | reason=permission_denied
|
| 31 |
[2026-03-28T23:32:56+0100] Iteration 2/20 | Step: unknown | action=blocked | reason=Final verification gate is blocked because `uv run openenv build -t openenv-sql-env-f007-hf-submission` still fails with `No space left on device`; build-success evidence is required before F007 can transition from verifying to complete.
|
| 32 |
+
[2026-03-29T09:35:27+0200] Iteration 3/20 | Step: unknown | action=legacy | reason=permission_denied
|
| 33 |
+
[2026-03-29T09:36:37+0200] Iteration 4/20 | Step: unknown | action=complete
|
| 34 |
+
[2026-03-29T09:36:37+0200] === Ralph Loop Complete === iterations=4 elapsed=33721.4s
|
server/app.py
CHANGED
|
@@ -89,7 +89,7 @@ app = create_app(
|
|
| 89 |
)
|
| 90 |
|
| 91 |
|
| 92 |
-
def main(host: str = "0.0.0.0", port: int =
|
| 93 |
"""Entry point for running the server directly.
|
| 94 |
|
| 95 |
Enables:
|
|
@@ -98,13 +98,16 @@ def main(host: str = "0.0.0.0", port: int = 8000):
|
|
| 98 |
"""
|
| 99 |
import uvicorn
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
uvicorn.run(app, host=host, port=port)
|
| 102 |
|
| 103 |
|
| 104 |
if __name__ == "__main__":
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
parser = argparse.ArgumentParser()
|
| 108 |
-
parser.add_argument("--port", type=int, default=8000)
|
| 109 |
-
args = parser.parse_args()
|
| 110 |
-
main(port=args.port)
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
|
| 92 |
+
def main(host: str = "0.0.0.0", port: int | None = None):
|
| 93 |
"""Entry point for running the server directly.
|
| 94 |
|
| 95 |
Enables:
|
|
|
|
| 98 |
"""
|
| 99 |
import uvicorn
|
| 100 |
|
| 101 |
+
if port is None:
|
| 102 |
+
import argparse
|
| 103 |
+
|
| 104 |
+
parser = argparse.ArgumentParser()
|
| 105 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 106 |
+
args = parser.parse_args()
|
| 107 |
+
port = args.port
|
| 108 |
+
|
| 109 |
uvicorn.run(app, host=host, port=port)
|
| 110 |
|
| 111 |
|
| 112 |
if __name__ == "__main__":
|
| 113 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
specs/F007-DEMO.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Feature Demo: F007 — HuggingFace Deployment & Submission
|
| 2 |
|
| 3 |
-
> **Generated:** 2026-03-
|
| 4 |
> **Context source:** spec + discovery only (implementation not read)
|
| 5 |
> **Feature entry:** [FEATURES.json #F007](../specs/FEATURES.json)
|
| 6 |
|
|
@@ -8,9 +8,9 @@
|
|
| 8 |
|
| 9 |
## What This Feature Does
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
From a user perspective,
|
| 14 |
|
| 15 |
---
|
| 16 |
|
|
@@ -18,49 +18,23 @@ From a user perspective, this removes submission friction: instead of piecing to
|
|
| 18 |
|
| 19 |
### Verified in This Demo Run
|
| 20 |
|
| 21 |
-
- Ran `uv run openenv validate --verbose`
|
| 22 |
-
-
|
| 23 |
-
- Ran `uv run openenv
|
| 24 |
-
- Ran
|
|
|
|
| 25 |
|
| 26 |
### Previously Verified Evidence
|
| 27 |
|
| 28 |
-
- `specs/
|
| 29 |
-
- `specs/F007-
|
| 30 |
-
- `specs/FEATURES.json` (`verification_evidence` for F007) records verification evidence: 250 passed, 1 skipped with verifier approval.
|
| 31 |
|
| 32 |
---
|
| 33 |
|
| 34 |
## What Still Needs User Verification
|
| 35 |
|
| 36 |
-
-
|
| 37 |
-
-
|
| 38 |
-
- Complete/polish and publish the final blog post content from the outline.
|
| 39 |
-
|
| 40 |
-
### Deployment Re-Run Recipe
|
| 41 |
-
|
| 42 |
-
1. Use an explicit lowercase image tag when building:
|
| 43 |
-
- `uv run openenv build -t openenv-sql-env-f007-hf-submission`
|
| 44 |
-
2. Ensure Hugging Face credentials remain configured before any re-push:
|
| 45 |
-
- `huggingface-cli login` (or equivalent token export expected by your `openenv push` setup)
|
| 46 |
-
3. Re-run deployment sequence in order:
|
| 47 |
-
- `uv run openenv validate --verbose`
|
| 48 |
-
- `uv run openenv build -t openenv-sql-env-f007-hf-submission`
|
| 49 |
-
- `uv run openenv push`
|
| 50 |
-
4. Keep the generated Hugging Face frontmatter block in `README.md` (push currently succeeds with `colorFrom: blue` and `colorTo: green`).
|
| 51 |
-
|
| 52 |
-
### Evidence Submission Format (for verifier re-run)
|
| 53 |
-
|
| 54 |
-
Append the authenticated deployment evidence directly in this file under `## Live Local Proof` using this structure:
|
| 55 |
-
|
| 56 |
-
1. `### Authenticated Build Evidence`
|
| 57 |
-
- Command: `uv run openenv build -t openenv-sql-env-f007-hf-submission`
|
| 58 |
-
- Paste raw terminal output block showing GHCR pull success and build completion.
|
| 59 |
-
2. `### Hugging Face Push Evidence`
|
| 60 |
-
- Command: `uv run openenv push`
|
| 61 |
-
- Paste raw terminal output block showing authenticated push attempt/result.
|
| 62 |
-
3. Optional but recommended: `### Deployed Space Runtime Evidence`
|
| 63 |
-
- Command(s): `curl https://<space-url>/health` and a short episode transcript.
|
| 64 |
|
| 65 |
---
|
| 66 |
|
|
@@ -69,20 +43,20 @@ Append the authenticated deployment evidence directly in this file under `## Liv
|
|
| 69 |
> Run these commands to see the feature in action:
|
| 70 |
|
| 71 |
```bash
|
| 72 |
-
uv sync
|
| 73 |
uv run openenv validate --verbose
|
| 74 |
uv run openenv build -t openenv-sql-env-f007-hf-submission
|
|
|
|
| 75 |
```
|
| 76 |
|
| 77 |
-
Prereq:
|
| 78 |
|
| 79 |
---
|
| 80 |
|
| 81 |
## Live Local Proof
|
| 82 |
|
| 83 |
-
### Validate
|
| 84 |
|
| 85 |
-
This
|
| 86 |
|
| 87 |
```bash
|
| 88 |
uv run openenv validate --verbose
|
|
@@ -101,65 +75,26 @@ Supported deployment modes:
|
|
| 101 |
[NO] python_module
|
| 102 |
```
|
| 103 |
|
| 104 |
-
What to notice: Docker mode
|
| 105 |
-
|
| 106 |
-
### Build Docker artifact (default tag behavior)
|
| 107 |
-
|
| 108 |
-
This attempts the standard build path without manual tag override.
|
| 109 |
-
|
| 110 |
-
```bash
|
| 111 |
-
uv run openenv build
|
| 112 |
-
```
|
| 113 |
-
|
| 114 |
-
```text
|
| 115 |
-
...
|
| 116 |
-
ERROR: invalid tag "openenv-sql-env-F007-huggingface-deployment-submission": repository name must be lowercase
|
| 117 |
-
|
| 118 |
-
✗ Docker build failed
|
| 119 |
-
```
|
| 120 |
-
|
| 121 |
-
What to notice: default tag generation uses mixed-case env name and fails Docker tag constraints.
|
| 122 |
-
|
| 123 |
-
### Build Docker artifact with explicit lowercase tag
|
| 124 |
-
|
| 125 |
-
This retries with a user-provided lowercase tag.
|
| 126 |
-
|
| 127 |
-
```bash
|
| 128 |
-
uv run openenv build -t openenv-sql-env-f007-hf-submission
|
| 129 |
-
```
|
| 130 |
-
|
| 131 |
-
```text
|
| 132 |
-
...
|
| 133 |
-
ERROR: failed to copy file from /root/.cache/uv/archive-v0/... to /app/env/.venv/...: No space left on device (os error 28)
|
| 134 |
-
|
| 135 |
-
✗ Docker build failed
|
| 136 |
-
```
|
| 137 |
-
|
| 138 |
-
What to notice: local tag issue is resolved and GHCR base-image pull succeeds; the current blocker is local Docker disk capacity during dependency install.
|
| 139 |
-
|
| 140 |
-
### Authenticated Build Evidence
|
| 141 |
|
| 142 |
-
|
| 143 |
|
| 144 |
```bash
|
| 145 |
uv run openenv build -t openenv-sql-env-f007-hf-submission
|
| 146 |
```
|
| 147 |
|
| 148 |
```text
|
| 149 |
-
|
| 150 |
-
#2 DONE 0.0s
|
| 151 |
-
#3 [internal] load metadata for ghcr.io/meta-pytorch/openenv-base:latest
|
| 152 |
-
#3 DONE 0.5s
|
| 153 |
...
|
| 154 |
#18 naming to docker.io/library/openenv-sql-env-f007-hf-submission done
|
| 155 |
-
#18 DONE 0.0s
|
| 156 |
-
...
|
| 157 |
✓ Docker build successful
|
|
|
|
|
|
|
| 158 |
```
|
| 159 |
|
| 160 |
-
|
| 161 |
|
| 162 |
-
|
| 163 |
|
| 164 |
```bash
|
| 165 |
uv run openenv push
|
|
@@ -174,91 +109,86 @@ Uploading files to hjerpe/sql_env...
|
|
| 174 |
Space URL: https://huggingface.co/spaces/hjerpe/sql_env
|
| 175 |
|
| 176 |
✓ Deployment complete!
|
|
|
|
| 177 |
```
|
| 178 |
|
|
|
|
|
|
|
| 179 |
---
|
| 180 |
|
| 181 |
## Existing Evidence
|
| 182 |
|
| 183 |
-
-
|
| 184 |
-
-
|
| 185 |
|
| 186 |
---
|
| 187 |
|
| 188 |
## Manual Verification Checklist
|
| 189 |
|
| 190 |
-
1.
|
| 191 |
-
2.
|
| 192 |
-
3.
|
| 193 |
-
4.
|
| 194 |
-
5.
|
| 195 |
|
| 196 |
---
|
| 197 |
|
| 198 |
## Edge Cases Exercised
|
| 199 |
|
| 200 |
-
###
|
| 201 |
-
|
| 202 |
-
```bash
|
| 203 |
-
uv run openenv build
|
| 204 |
-
```
|
| 205 |
-
|
| 206 |
-
```text
|
| 207 |
-
ERROR: invalid tag "openenv-sql-env-F007-huggingface-deployment-submission": repository name must be lowercase
|
| 208 |
-
```
|
| 209 |
-
|
| 210 |
-
This matters because build reproducibility depends on explicit lowercase tagging in this repo naming pattern.
|
| 211 |
-
|
| 212 |
-
### Build succeeds with explicit lowercase tag
|
| 213 |
|
| 214 |
```bash
|
| 215 |
-
uv run openenv
|
| 216 |
```
|
| 217 |
|
| 218 |
```text
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
| 221 |
```
|
| 222 |
|
| 223 |
-
This
|
| 224 |
|
| 225 |
-
###
|
| 226 |
|
| 227 |
```bash
|
| 228 |
-
uv run
|
| 229 |
```
|
| 230 |
|
| 231 |
```text
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
Space URL: https://huggingface.co/spaces/hjerpe/sql_env
|
| 236 |
```
|
| 237 |
|
| 238 |
-
This
|
| 239 |
|
| 240 |
-
###
|
| 241 |
|
| 242 |
```bash
|
| 243 |
-
uv run --with pytest pytest tests/ -v
|
| 244 |
```
|
| 245 |
|
| 246 |
```text
|
| 247 |
-
|
|
|
|
|
|
|
| 248 |
```
|
| 249 |
|
| 250 |
-
This
|
| 251 |
|
| 252 |
---
|
| 253 |
|
| 254 |
## Test Evidence (Optional)
|
| 255 |
|
| 256 |
> Supplementary proof that the feature works correctly across all scenarios.
|
| 257 |
-
> The Live Demo section above shows how to use this deployment path locally.
|
| 258 |
|
| 259 |
| Test Suite | Tests | Status |
|
| 260 |
|---|---|---|
|
| 261 |
-
| Full
|
|
|
|
| 262 |
|
| 263 |
---
|
| 264 |
|
|
|
|
| 1 |
# Feature Demo: F007 — HuggingFace Deployment & Submission
|
| 2 |
|
| 3 |
+
> **Generated:** 2026-03-29T07:33:23Z
|
| 4 |
> **Context source:** spec + discovery only (implementation not read)
|
| 5 |
> **Feature entry:** [FEATURES.json #F007](../specs/FEATURES.json)
|
| 6 |
|
|
|
|
| 8 |
|
| 9 |
## What This Feature Does
|
| 10 |
|
| 11 |
+
F007 packages SQLEnv so a judge can actually consume it end-to-end: discover the project from README, run or visit the deployed Hugging Face Space, and use the training notebook workflow.
|
| 12 |
|
| 13 |
+
From a user perspective, the core value is trust and usability: deployment assets validate/build/push cleanly, and the submission package is runnable by someone outside the team.
|
| 14 |
|
| 15 |
---
|
| 16 |
|
|
|
|
| 18 |
|
| 19 |
### Verified in This Demo Run
|
| 20 |
|
| 21 |
+
- Ran deployment validation locally with `uv run openenv validate --verbose`.
|
| 22 |
+
- Built deployment image locally with `uv run openenv build -t openenv-sql-env-f007-hf-submission`.
|
| 23 |
+
- Ran authenticated deployment push with `uv run openenv push` to `https://huggingface.co/spaces/hjerpe/sql_env`.
|
| 24 |
+
- Ran notebook/training E2E checks (`tests/e2e/test_training_e2e.py`): 5 passed.
|
| 25 |
+
- Ran full regression suite: 250 passed, 1 skipped.
|
| 26 |
|
| 27 |
### Previously Verified Evidence
|
| 28 |
|
| 29 |
+
- `specs/FEATURES.json` → `verification_evidence` for F007: 250/250 tests passed, verifier approved.
|
| 30 |
+
- `specs/F007-IMPLEMENTATION_SPEC.md` (Section 1a) records authenticated build + push completion evidence.
|
|
|
|
| 31 |
|
| 32 |
---
|
| 33 |
|
| 34 |
## What Still Needs User Verification
|
| 35 |
|
| 36 |
+
- Open the live Space in a browser and manually run a reset/step/answer episode flow.
|
| 37 |
+
- Open `notebooks/train_grpo.ipynb` in Colab and execute cells in order on a clean runtime.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
---
|
| 40 |
|
|
|
|
| 43 |
> Run these commands to see the feature in action:
|
| 44 |
|
| 45 |
```bash
|
|
|
|
| 46 |
uv run openenv validate --verbose
|
| 47 |
uv run openenv build -t openenv-sql-env-f007-hf-submission
|
| 48 |
+
uv run openenv push
|
| 49 |
```
|
| 50 |
|
| 51 |
+
Prereq: authenticated Hugging Face CLI/account with write access to target Space.
|
| 52 |
|
| 53 |
---
|
| 54 |
|
| 55 |
## Live Local Proof
|
| 56 |
|
| 57 |
+
### Validate Deployment Configuration
|
| 58 |
|
| 59 |
+
This confirms deployment mode support and flags non-Docker modes clearly.
|
| 60 |
|
| 61 |
```bash
|
| 62 |
uv run openenv validate --verbose
|
|
|
|
| 75 |
[NO] python_module
|
| 76 |
```
|
| 77 |
|
| 78 |
+
What to notice: Docker mode (the scoped deployment surface for F007) is supported.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
### Build the Hugging Face Deployment Image
|
| 81 |
|
| 82 |
```bash
|
| 83 |
uv run openenv build -t openenv-sql-env-f007-hf-submission
|
| 84 |
```
|
| 85 |
|
| 86 |
```text
|
| 87 |
+
Building Docker image for: sql-env-F007-huggingface-deployment-submission
|
|
|
|
|
|
|
|
|
|
| 88 |
...
|
| 89 |
#18 naming to docker.io/library/openenv-sql-env-f007-hf-submission done
|
|
|
|
|
|
|
| 90 |
✓ Docker build successful
|
| 91 |
+
|
| 92 |
+
Done!
|
| 93 |
```
|
| 94 |
|
| 95 |
+
What to notice: image build completed successfully with the expected tag.
|
| 96 |
|
| 97 |
+
### Push to Hugging Face Space
|
| 98 |
|
| 99 |
```bash
|
| 100 |
uv run openenv push
|
|
|
|
| 109 |
Space URL: https://huggingface.co/spaces/hjerpe/sql_env
|
| 110 |
|
| 111 |
✓ Deployment complete!
|
| 112 |
+
Visit your space at: https://huggingface.co/spaces/hjerpe/sql_env
|
| 113 |
```
|
| 114 |
|
| 115 |
+
What to notice: authenticated push succeeded and produced a live Space URL.
|
| 116 |
+
|
| 117 |
---
|
| 118 |
|
| 119 |
## Existing Evidence
|
| 120 |
|
| 121 |
+
- Verification spec target command (`uv run --with pytest pytest tests/ -v`) was re-run in this demo and passed.
|
| 122 |
+
- F007 entry in `specs/FEATURES.json` already recorded verifier approval before this refresh.
|
| 123 |
|
| 124 |
---
|
| 125 |
|
| 126 |
## Manual Verification Checklist
|
| 127 |
|
| 128 |
+
1. Open `https://huggingface.co/spaces/hjerpe/sql_env`.
|
| 129 |
+
2. Confirm the app loads without startup errors.
|
| 130 |
+
3. Start an episode (reset), then run at least one exploration step.
|
| 131 |
+
4. Submit an answer action and confirm terminal response/reward appears.
|
| 132 |
+
5. Open `notebooks/train_grpo.ipynb` in Colab and run setup + connect + one training/eval pass.
|
| 133 |
|
| 134 |
---
|
| 135 |
|
| 136 |
## Edge Cases Exercised
|
| 137 |
|
| 138 |
+
### Deployment-mode boundary is explicit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
```bash
|
| 141 |
+
uv run openenv validate --verbose
|
| 142 |
```
|
| 143 |
|
| 144 |
```text
|
| 145 |
+
Supported deployment modes:
|
| 146 |
+
[YES] docker
|
| 147 |
+
[NO] openenv_serve
|
| 148 |
+
[NO] uv_run
|
| 149 |
+
[NO] python_module
|
| 150 |
```
|
| 151 |
|
| 152 |
+
This matters because F007’s release path is Docker/HF Spaces; unsupported modes are surfaced clearly.
|
| 153 |
|
| 154 |
+
### Verification-spec command drift (error case)
|
| 155 |
|
| 156 |
```bash
|
| 157 |
+
uv run --with pytest pytest tests/e2e/test_readme_completeness.py -v
|
| 158 |
```
|
| 159 |
|
| 160 |
```text
|
| 161 |
+
ERROR: file or directory not found: tests/e2e/test_readme_completeness.py
|
| 162 |
+
collected 0 items
|
| 163 |
+
============================ no tests ran in 0.00s ============================
|
|
|
|
| 164 |
```
|
| 165 |
|
| 166 |
+
This matters because it reveals a spec-to-repo mismatch that should be corrected in verification artifacts.
|
| 167 |
|
| 168 |
+
### Notebook pipeline smoke validation still passes
|
| 169 |
|
| 170 |
```bash
|
| 171 |
+
uv run --with pytest pytest tests/e2e/test_training_e2e.py -v
|
| 172 |
```
|
| 173 |
|
| 174 |
```text
|
| 175 |
+
collected 5 items
|
| 176 |
+
...
|
| 177 |
+
============================== 5 passed in 11.33s ==============================
|
| 178 |
```
|
| 179 |
|
| 180 |
+
This confirms the training notebook path still has executable smoke coverage.
|
| 181 |
|
| 182 |
---
|
| 183 |
|
| 184 |
## Test Evidence (Optional)
|
| 185 |
|
| 186 |
> Supplementary proof that the feature works correctly across all scenarios.
|
|
|
|
| 187 |
|
| 188 |
| Test Suite | Tests | Status |
|
| 189 |
|---|---|---|
|
| 190 |
+
| Full regression (`uv run --with pytest pytest tests/ -v`) | 251 collected | 250 passed, 1 skipped |
|
| 191 |
+
| Training E2E (`tests/e2e/test_training_e2e.py`) | 5 | All passed |
|
| 192 |
|
| 193 |
---
|
| 194 |
|
specs/F007-VERIFICATION_REPORT.md
CHANGED
|
@@ -38,7 +38,7 @@ Decision: **APPROVE**
|
|
| 38 |
### 3.1 Implementation Step Completion
|
| 39 |
|
| 40 |
- Section 7 implementation steps (1.1, 1.2, 1.3, 2.1, 2.2, 3.1) are all marked `OK Completed`.
|
| 41 |
-
- Section 1a
|
| 42 |
|
| 43 |
### 3.2 Test Execution
|
| 44 |
|
|
|
|
| 38 |
### 3.1 Implementation Step Completion
|
| 39 |
|
| 40 |
- Section 7 implementation steps (1.1, 1.2, 1.3, 2.1, 2.2, 3.1) are all marked `OK Completed`.
|
| 41 |
+
- Section 1a now reports **Progress 7/7** and **Current Step: Finalization Protocol (OK Completed)**.
|
| 42 |
|
| 43 |
### 3.2 Test Execution
|
| 44 |
|
specs/FEATURES.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"project": "SQLEnv - Interactive Database Query RL Environment",
|
| 4 |
"description": "OpenEnv Challenge submission: RL environment where agents learn to answer NL questions about databases through iterative SQL exploration",
|
| 5 |
"created": "2026-03-24T07:15:50Z",
|
| 6 |
-
"updated": "2026-03-29T07:
|
| 7 |
"features": [
|
| 8 |
{
|
| 9 |
"id": "F001",
|
|
@@ -11,7 +11,7 @@
|
|
| 11 |
"description": "Complete the step/reset lifecycle: remove Ollama from environment, accept structured actions (DESCRIBE table_name, SAMPLE table_name, QUERY sql_string, ANSWER value), wire up SQLite execution with sandboxing (read-only, 5s timeout, SELECT-only), load questions from JSON on reset(), enforce step budget (15 steps), handle episode termination",
|
| 12 |
"complexity": "complex",
|
| 13 |
"verification_mode": "standard",
|
| 14 |
-
"status": "
|
| 15 |
"priority": 1,
|
| 16 |
"dependencies": [],
|
| 17 |
"docs": {
|
|
@@ -566,7 +566,7 @@
|
|
| 566 |
"description": "Competition submission package: validate and push Docker to HF Spaces (openenv push), clean up GitHub repo (README, setup instructions, training notebook), write HF blog post outline (hook, problem, solution, results, technical), record/screenshot before-vs-after demo.",
|
| 567 |
"complexity": "standard",
|
| 568 |
"verification_mode": "mvp",
|
| 569 |
-
"status": "
|
| 570 |
"priority": 7,
|
| 571 |
"dependencies": [
|
| 572 |
"F001",
|
|
@@ -637,14 +637,14 @@
|
|
| 637 |
"mode": "mvp",
|
| 638 |
"tests_run": 250,
|
| 639 |
"tests_passed": 250,
|
| 640 |
-
"timestamp": "2026-03-
|
| 641 |
"command": "uv run --with pytest pytest tests/ -v",
|
| 642 |
"verifier_result": "approved"
|
| 643 |
},
|
| 644 |
"user_value": "Judges and external developers can now consume a complete SQLEnv submission package with HF Spaces-compatible deployment artifacts, a polished README quickstart, a structured blog outline, and a Colab-ready GRPO training notebook.",
|
| 645 |
"demo": {
|
| 646 |
"path": "specs/F007-DEMO.md",
|
| 647 |
-
"generated_at": "2026-03-
|
| 648 |
"mode": "infra_release",
|
| 649 |
"status": "partial",
|
| 650 |
"requires_user_verification": true,
|
|
|
|
| 3 |
"project": "SQLEnv - Interactive Database Query RL Environment",
|
| 4 |
"description": "OpenEnv Challenge submission: RL environment where agents learn to answer NL questions about databases through iterative SQL exploration",
|
| 5 |
"created": "2026-03-24T07:15:50Z",
|
| 6 |
+
"updated": "2026-03-29T07:33:23Z",
|
| 7 |
"features": [
|
| 8 |
{
|
| 9 |
"id": "F001",
|
|
|
|
| 11 |
"description": "Complete the step/reset lifecycle: remove Ollama from environment, accept structured actions (DESCRIBE table_name, SAMPLE table_name, QUERY sql_string, ANSWER value), wire up SQLite execution with sandboxing (read-only, 5s timeout, SELECT-only), load questions from JSON on reset(), enforce step budget (15 steps), handle episode termination",
|
| 12 |
"complexity": "complex",
|
| 13 |
"verification_mode": "standard",
|
| 14 |
+
"status": "verifying",
|
| 15 |
"priority": 1,
|
| 16 |
"dependencies": [],
|
| 17 |
"docs": {
|
|
|
|
| 566 |
"description": "Competition submission package: validate and push Docker to HF Spaces (openenv push), clean up GitHub repo (README, setup instructions, training notebook), write HF blog post outline (hook, problem, solution, results, technical), record/screenshot before-vs-after demo.",
|
| 567 |
"complexity": "standard",
|
| 568 |
"verification_mode": "mvp",
|
| 569 |
+
"status": "complete",
|
| 570 |
"priority": 7,
|
| 571 |
"dependencies": [
|
| 572 |
"F001",
|
|
|
|
| 637 |
"mode": "mvp",
|
| 638 |
"tests_run": 250,
|
| 639 |
"tests_passed": 250,
|
| 640 |
+
"timestamp": "2026-03-29T07:29:32Z",
|
| 641 |
"command": "uv run --with pytest pytest tests/ -v",
|
| 642 |
"verifier_result": "approved"
|
| 643 |
},
|
| 644 |
"user_value": "Judges and external developers can now consume a complete SQLEnv submission package with HF Spaces-compatible deployment artifacts, a polished README quickstart, a structured blog outline, and a Colab-ready GRPO training notebook.",
|
| 645 |
"demo": {
|
| 646 |
"path": "specs/F007-DEMO.md",
|
| 647 |
+
"generated_at": "2026-03-29T07:33:23Z",
|
| 648 |
"mode": "infra_release",
|
| 649 |
"status": "partial",
|
| 650 |
"requires_user_verification": true,
|