| """FastAPI application for the SimMart environment.""" |
|
|
| from fastapi.responses import HTMLResponse |
|
|
| try: |
| from openenv.core.env_server.http_server import create_app |
| except Exception as e: |
| raise ImportError( |
| "openenv-core is required. pip install openenv-core" |
| ) from e |
|
|
| try: |
| from ..models import SimMartAction, SimMartObservation |
| from .environment import SimMartEnvironment |
| except (ImportError, ModuleNotFoundError): |
| from models import SimMartAction, SimMartObservation |
| from server.environment import SimMartEnvironment |
|
|
| app = create_app( |
| SimMartEnvironment, |
| SimMartAction, |
| SimMartObservation, |
| env_name="simmart", |
| max_concurrent_envs=10, |
| ) |
|
|
|
|
| _HUB = "https://huggingface.co/spaces/Viani/SimMart/blob/main" |
|
|
| _INDEX_HTML = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <title>SimMart β OpenEnv retail-CEO simulation</title> |
| <style> |
| body {{ font: 15px/1.55 -apple-system, system-ui, "Segoe UI", Roboto, sans-serif; |
| max-width: 760px; margin: 36px auto; padding: 0 22px; color: #1a1a1a; }} |
| h1 {{ font-size: 28px; margin: 0 0 4px 0; }} |
| .tag {{ color: #555; font-size: 14px; margin: 0 0 22px 0; }} |
| h3 {{ margin-top: 26px; font-size: 16px; }} |
| p {{ margin: 10px 0; }} |
| code {{ background: #f4f4f5; padding: 1px 5px; border-radius: 3px; |
| font: 13px/1.4 ui-monospace, SFMono-Regular, Menlo, monospace; }} |
| pre {{ background: #f4f4f5; padding: 12px 14px; border-radius: 6px; |
| overflow-x: auto; font: 13px/1.5 ui-monospace, SFMono-Regular, Menlo, monospace; |
| border: 1px solid #ececec; }} |
| a {{ color: #c2410c; text-decoration: none; }} |
| a:hover {{ text-decoration: underline; }} |
| table {{ border-collapse: collapse; margin: 6px 0 14px 0; }} |
| table td {{ padding: 4px 16px 4px 0; vertical-align: top; font-size: 14px; }} |
| .links a {{ display: inline-block; margin: 4px 16px 4px 0; }} |
| .pill {{ display: inline-block; padding: 2px 8px; border-radius: 11px; |
| background: #fff7ed; color: #c2410c; font-size: 12px; font-weight: 600; }} |
| </style> |
| </head> |
| <body> |
| <h1>π SimMart <span class="pill">OpenEnv</span></h1> |
| <p class="tag">A 1.5B model running a 30-store, 8-week tier-2 Indian retail chain.</p> |
| |
| <p>An LLM CEO opens a weekly inbox of 12β18 proposals from four department agents |
| (Supply Chain, Store Ops, Finance, Growth). Each week the CEO emits an |
| <code>approve</code> / <code>reject</code> / <code>flag_suspicious</code> verdict |
| per proposal, plus a free-form Founder's Journal. Two of the proposals each |
| quarter are deliberately <em>rogue</em> β inflated POs, kickback contracts, |
| fictitious refunds. Reward is dense: KPI deltas (EBITDA + NPS + stockout + |
| cash) + rogue catch + terminal P&L + journal coherence.</p> |
| |
| <p>Trained with SFT then 110 GRPO steps on Qwen2.5-1.5B + LoRA. Held-out |
| reward <strong>+0.84</strong> — within <strong>0.37</strong> of Claude |
| Haiku 4.5, <strong>2× the reward of Claude Sonnet 4.6</strong>, at |
| 1/800 the parameter count. See |
| <a href="{_HUB}/BLOG.md" target="_top">BLOG.md</a> for the full results.</p> |
| |
| <h3>API endpoints</h3> |
| <table> |
| <tr><td><code>POST /reset</code></td><td>Start a new episode. Body: <code>{{"seed": int}}</code></td></tr> |
| <tr><td><code>POST /step</code></td><td>Take a CEO action. Body: <code>{{"env_id": str, "action": SimMartAction}}</code></td></tr> |
| <tr><td><code>GET /state</code></td><td>Current observation without stepping</td></tr> |
| <tr><td><code>GET <a href="/docs">/docs</a></code></td><td>Interactive Swagger UI (full schema)</td></tr> |
| </table> |
| |
| <h3>Try it (curl)</h3> |
| <pre>curl -X POST https://Viani-SimMart.hf.space/reset \\ |
| -H 'Content-Type: application/json' \\ |
| -d '{{"seed": 42}}'</pre> |
| |
| <h3>Materials</h3> |
| <p class="links"> |
| <a href="{_HUB}/README.md" target="_top">README</a> |
| <a href="{_HUB}/BLOG.md" target="_top">Mini-blog</a> |
| <a href="{_HUB}/notebooks/hackathon_grpo_single_gpu.ipynb" target="_top">Training notebook</a> |
| <a href="{_HUB}/assets/training_curve_4dept.png" target="_top">Training curve</a> |
| <a href="/docs">API docs</a> |
| </p> |
| </body> |
| </html> |
| """ |
|
|
|
|
| @app.get("/", response_class=HTMLResponse, include_in_schema=False) |
| def index() -> HTMLResponse: |
| """Landing page for the HF Space iframe; not part of the OpenEnv API.""" |
| return HTMLResponse(_INDEX_HTML) |
|
|
|
|
| def main(host: str = "0.0.0.0", port: int = 7860): |
| import uvicorn |
| uvicorn.run(app, host=host, port=port) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|