Spaces:

exploring-solver
/

my-env

Sleeping

App Files Files Community

exploring-solver commited on 29 days ago

Commit

62c5bbf

1 Parent(s): ceec48c

Submission tweaks

Browse files

Files changed (7) hide show

.gitignore +3 -1
inference.py +7 -2
pyproject.toml +27 -0
server/__init__.py +1 -0
server/app.py +16 -0
setup.md +69 -164
uv.lock +8 -0

.gitignore CHANGED Viewed

@@ -7,4 +7,6 @@ venv/
 *.egg-info/
 dist/
 build/
-.pytest_cache/

 *.egg-info/
 dist/
 build/
+.pytest_cache/
+myenv/
+res/

inference.py CHANGED Viewed

@@ -8,7 +8,8 @@ Environment variables:
   API_BASE_URL   LLM endpoint            (default: https://router.huggingface.co/v1)
   MODEL_NAME     Model identifier         (default: Qwen/Qwen2.5-72B-Instruct)
   HF_TOKEN       API key
-  API_BASE_URL_ENV  SupportEnv server URL (default: http://localhost:7860)
 """
 import json
 import os
@@ -26,7 +27,11 @@ from openai import OpenAI
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "")
-ENV_BASE_URL = os.getenv("API_BASE_URL_ENV", "http://localhost:7860")
 TEMPERATURE = 0.3
 MAX_TOKENS = 1024

   API_BASE_URL   LLM endpoint            (default: https://router.huggingface.co/v1)
   MODEL_NAME     Model identifier         (default: Qwen/Qwen2.5-72B-Instruct)
   HF_TOKEN       API key
+    OPENENV_BASE_URL  SupportEnv server URL (preferred)
+    API_BASE_URL_ENV  SupportEnv server URL (backward compatible alias)
 """
 import json
 import os
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "")
+ENV_BASE_URL = (
+    os.getenv("OPENENV_BASE_URL")
+    or os.getenv("API_BASE_URL_ENV")
+    or "http://localhost:7860"
+)
 TEMPERATURE = 0.3
 MAX_TOKENS = 1024

pyproject.toml ADDED Viewed

	@@ -0,0 +1,27 @@

+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "supportenv"
+version = "1.0.0"
+description = "OpenEnv customer support ticket triage benchmark"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+  "fastapi>=0.111.0",
+  "uvicorn[standard]>=0.30.0",
+  "pydantic>=2.7.0",
+  "openai>=1.35.0",
+  "httpx>=0.27.0",
+  "python-multipart>=0.0.9",
+  "requests>=2.31.0",
+  "openenv-core>=0.2.0",
+]
+[project.scripts]
+server = "server.app:main"
+[tool.setuptools]
+py-modules = ["app", "data", "environment", "graders", "inference", "models"]
+packages = ["server"]

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SupportEnv server package."""

server/app.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Validator-friendly server entrypoint for SupportEnv."""
+from __future__ import annotations
+import os
+import uvicorn
+def main() -> None:
+    """Launch the FastAPI app on the Hugging Face expected host/port."""
+    port = int(os.environ.get("PORT", "7860"))
+    uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1)
+if __name__ == "__main__":
+    main()

setup.md CHANGED Viewed

@@ -1,214 +1,119 @@
-# SETUP.md — Local Development Guide
-## Prerequisites
-- Python 3.10+ ([download](https://www.python.org/downloads/))
-- Git
-- Docker (optional, for containerised run)
-- An OpenAI API key (optional, only for the LLM baseline)
----
-## 1. Clone the repository
-```bash
-git clone https://github.com/Shivoo29/dummy_1.git
-cd dummy_1
-git checkout claude/openenv-ai-agent-environment-qJ9pB
-```
----
-## 2. Create a virtual environment
-```bash
-python -m venv .venv
-# macOS / Linux
-source .venv/bin/activate
-# Windows (PowerShell)
 .venv\Scripts\Activate.ps1
-```
----
-## 3. Install dependencies
-```bash
 pip install -r requirements.txt
 ```
----
-## 4. Run the server
 ```bash
-uvicorn app:app --host 0.0.0.0 --port 7860 --reload
 ```
-- API: http://localhost:7860
-- Interactive docs (Swagger UI): http://localhost:7860/docs
-- ReDoc: http://localhost:7860/redoc
----
-## 5. Quick smoke test
 ```bash
-# Health check
-curl http://localhost:7860/health
-# List tasks
-curl http://localhost:7860/tasks
-# Start a task1 episode
-curl -X POST http://localhost:7860/reset \
-  -H "Content-Type: application/json" \
-  -d '{"task_id": "task1", "ticket_index": 0}'
-# The response contains an episode_id — use it below
-EPISODE_ID="<paste episode_id here>"
-# Submit a classification action
-curl -X POST http://localhost:7860/step \
-  -H "Content-Type: application/json" \
-  -d "{\"episode_id\": \"$EPISODE_ID\", \"action\": {\"action_type\": \"classify\", \"category\": \"billing\", \"priority\": \"high\"}}"
-# Submit to close the episode
-curl -X POST http://localhost:7860/step \
-  -H "Content-Type: application/json" \
-  -d "{\"episode_id\": \"$EPISODE_ID\", \"action\": {\"action_type\": \"submit\"}}"
-# Grade the episode
-curl -X POST http://localhost:7860/grader \
-  -H "Content-Type: application/json" \
-  -d "{\"episode_id\": \"$EPISODE_ID\"}"
 ```
----
-## 6. Run the baseline
-### Heuristic baseline (no API key required)
 ```bash
-# Single ticket (ticket_index 0)
-python baseline.py --mode heuristic
-# All 5 tickets per task, averaged
-python baseline.py --mode heuristic --all-tickets
-```
-Expected output:
-```
-task1: 0.8600  (scores: [1.0, 1.0, 1.0, 1.0, 0.3])
-task2: 0.5614  (scores: [0.8, 0.386, 0.45, 0.7, 0.471])
-task3: 0.9895  (scores: [1.0, 0.992, 0.961, 0.994, 1.0])
-OVERALL AVERAGE: 0.8036
 ```
-### LLM baseline (requires OpenAI API key)
 ```bash
-export OPENAI_API_KEY="sk-..."          # macOS/Linux
-# $env:OPENAI_API_KEY="sk-..."          # Windows PowerShell
-python baseline.py --mode llm --model gpt-4o-mini
-python baseline.py --mode llm --model gpt-4o-mini --all-tickets
 ```
----
-## 7. Run with Docker
 ```bash
-# Build
-docker build -t supportenv .
-# Run (no API key needed for heuristic mode)
-docker run -p 7860:7860 supportenv
-# Run with OpenAI key for LLM baseline
-docker run -p 7860:7860 -e OPENAI_API_KEY="sk-..." supportenv
 ```
----
-## 8. Project layout
-```
-dummy_1/
-├── app.py            FastAPI server — all HTTP endpoints
-├── environment.py    Episode lifecycle: reset / step / state / grade
-├── graders.py        Deterministic graders for all 3 tasks
-├── data.py           15 pre-defined tickets + ground truth answers
-├── models.py         Pydantic typed models (Observation, Action, Reward…)
-├── baseline.py       Heuristic + LLM baseline inference scripts
-├── openenv.yaml      OpenEnv spec metadata
-├── Dockerfile        HF Spaces-compatible container (port 7860)
-├── requirements.txt  Python dependencies
-├── README.md         Full environment documentation
-└── SETUP.md          This file
 ```
----
-## 9. Key files to edit when extending
-| What you want to change | File to edit |
-|------------------------|-------------|
-| Add / modify tickets | `data.py` — `TASK1/2/3_TICKETS` lists |
-| Change grader weights | `graders.py` — `grade_task1/2/3()` |
-| Add a new task | `data.py` (add task meta) + `graders.py` + `app.py` (`_ACTION_SCHEMAS`) |
-| Change reward shaping | `environment.py` — `_step_reward_task*` functions and constants |
-| Add an endpoint | `app.py` |
-| Change typed models | `models.py` |
----
-## 10. Deploy to Hugging Face Spaces
-1. Create a new Space at https://huggingface.co/new-space
-   - SDK: **Docker**
-   - Visibility: Public
-2. Add the HF Space as a remote:
-   ```bash
-   git remote add hf https://huggingface.co/spaces/<your-username>/<space-name>
-   ```
-3. Push:
-   ```bash
-   git push hf claude/openenv-ai-agent-environment-qJ9pB:main
-   ```
-4. The Space auto-builds from the `Dockerfile` and exposes port 7860.
----
-## 11. Environment variables
-| Variable | Required | Description |
-|----------|----------|-------------|
-| `OPENAI_API_KEY` | Only for LLM baseline | Your OpenAI API key |
-| `PORT` | No (default 7860) | Override server port |
----
-## 12. Running tests
-```bash
-python -c "
-import environment as env
-from models import Action
-# Verify all 3 tasks reset and grade correctly
-for task_id in ['task1', 'task2', 'task3']:
-    for i in range(5):
-        obs = env.reset(task_id, i)
-        env.step(obs.episode_id, Action(action_type='submit'))
-        gr = env.grade(obs.episode_id)
-        assert 0.0 <= gr.score <= 1.0, f'Score out of range: {gr.score}'
-        print(f'{task_id} ticket[{i}]: score={gr.score:.4f} OK')
-print('All tests passed.')
-"
-```

+# setup.md - SupportEnv Validator-Focused Runbook
+## 1. What judges/validator execute
+Most checks align to this flow:
+1. `POST /reset` on the deployed Space
+2. `docker build` from repo root
+3. `openenv validate`
+4. endpoint contract checks for `/health`, `/reset`, `/step`, `/state`, `/grader`
+5. `python inference.py` and stdout format check for `[START]`, `[STEP]`, `[END]`
+## 2. File-by-file usage (root)
+- `app.py`: FastAPI API surface (`/reset`, `/step`, `/state`, `/tasks`, `/grader`, `/health`)
+- `environment.py`: episode lifecycle and reward accumulation (`reset`, `step`, `get_state`, `grade`)
+- `graders.py`: deterministic terminal scoring per task with score clamped to `[0.0, 1.0]`
+- `data.py`: task metadata and ticket datasets with ground truth labels/entities/steps
+- `models.py`: typed Pydantic models used by API and internal state
+- `inference.py`: baseline runner; calls the API, logs strict `[START]/[STEP]/[END]`
+- `openenv.yaml`: OpenEnv metadata and interface declaration used by validator
+- `Dockerfile`: image build/runtime contract for HF Docker Spaces (serves on `7860`)
+- `requirements.txt`: runtime dependencies
+- `pyproject.toml`: packaging metadata + script entrypoint expected by validator tooling
+- `uv.lock`: lockfile required by OpenEnv multi-mode validation path
+- `server/app.py`: validator-friendly script entrypoint (`server = server.app:main`)
+## 3. Local setup
+### Windows PowerShell
+```powershell
+python -m venv .venv
 .venv\Scripts\Activate.ps1
 pip install -r requirements.txt
 ```
+### macOS/Linux
 ```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
 ```
+## 4. Validation checklist (exact order)
+1. OpenEnv validator
 ```bash
+.venv/Scripts/openenv.exe validate
 ```
+2. Docker build
 ```bash
+docker build -t supportenv .
 ```
+3. Run server locally
 ```bash
+uvicorn app:app --host 0.0.0.0 --port 7860
 ```
+4. API checks
 ```bash
+curl http://127.0.0.1:7860/health
+curl -X POST http://127.0.0.1:7860/reset -H "Content-Type: application/json" -d '{"task_id":"task1","ticket_index":0}'
+curl -X POST http://127.0.0.1:7860/step -H "Content-Type: application/json" -d '{"episode_id":"<id>","action":{"action_type":"classify","category":"billing","priority":"high"}}'
+curl -X POST http://127.0.0.1:7860/state?episode_id=<id>
+curl -X POST http://127.0.0.1:7860/grader -H "Content-Type: application/json" -d '{"episode_id":"<id>"}'
 ```
+5. Baseline inference
+```bash
+python inference.py
 ```
+## 5. Docker and Spaces runtime model
+- Build stage installs from `requirements.txt`.
+- Runtime command runs Uvicorn: `app:app` on `0.0.0.0:7860`.
+- HF Space should set `sdk: docker` and `app_port: 7860` in `README.md` frontmatter.
+- Healthcheck points at `/health` to indicate container liveness.
+- If Docker daemon is not running locally, `docker build`/`docker run` will fail even if repo is correct.
+## 6. Inference variables
+- Required for LLM call path:
+  - `API_BASE_URL`
+  - `MODEL_NAME`
+  - `HF_TOKEN`
+- Environment endpoint:
+  - `OPENENV_BASE_URL` (preferred)
+  - `API_BASE_URL_ENV` (legacy alias)
+## 7. Example scorer sanity checks
+- Task 1: submit `classify` then `submit`, verify non-binary reward and final score in `[0, 1]`
+- Task 2: include deterministic entity/action coverage keys from ticket text
+- Task 3: include professional response plus ordered resolution steps
+## 8. Common failure causes
+- Missing `pyproject.toml` or `uv.lock`
+- Missing script entrypoint (`server = server.app:main`)
+- App not serving on `0.0.0.0:7860`
+- Duplicate HF variable/secret names in Space settings
+- Invalid or missing `HF_TOKEN` for real LLM inference

uv.lock ADDED Viewed

	@@ -0,0 +1,8 @@

+version = 1
+revision = 1
+requires-python = ">=3.10"
+[[package]]
+name = "supportenv"
+version = "1.0.0"
+source = { editable = "." }