exploring-solver commited on
Commit
62c5bbf
·
1 Parent(s): ceec48c

Submission tweaks

Browse files
Files changed (7) hide show
  1. .gitignore +3 -1
  2. inference.py +7 -2
  3. pyproject.toml +27 -0
  4. server/__init__.py +1 -0
  5. server/app.py +16 -0
  6. setup.md +69 -164
  7. uv.lock +8 -0
.gitignore CHANGED
@@ -7,4 +7,6 @@ venv/
7
  *.egg-info/
8
  dist/
9
  build/
10
- .pytest_cache/
 
 
 
7
  *.egg-info/
8
  dist/
9
  build/
10
+ .pytest_cache/
11
+ myenv/
12
+ res/
inference.py CHANGED
@@ -8,7 +8,8 @@ Environment variables:
8
  API_BASE_URL LLM endpoint (default: https://router.huggingface.co/v1)
9
  MODEL_NAME Model identifier (default: Qwen/Qwen2.5-72B-Instruct)
10
  HF_TOKEN API key
11
- API_BASE_URL_ENV SupportEnv server URL (default: http://localhost:7860)
 
12
  """
13
  import json
14
  import os
@@ -26,7 +27,11 @@ from openai import OpenAI
26
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
27
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
28
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "")
29
- ENV_BASE_URL = os.getenv("API_BASE_URL_ENV", "http://localhost:7860")
 
 
 
 
30
 
31
  TEMPERATURE = 0.3
32
  MAX_TOKENS = 1024
 
8
  API_BASE_URL LLM endpoint (default: https://router.huggingface.co/v1)
9
  MODEL_NAME Model identifier (default: Qwen/Qwen2.5-72B-Instruct)
10
  HF_TOKEN API key
11
+ OPENENV_BASE_URL SupportEnv server URL (preferred)
12
+ API_BASE_URL_ENV SupportEnv server URL (backward compatible alias)
13
  """
14
  import json
15
  import os
 
27
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
28
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
29
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "")
30
+ ENV_BASE_URL = (
31
+ os.getenv("OPENENV_BASE_URL")
32
+ or os.getenv("API_BASE_URL_ENV")
33
+ or "http://localhost:7860"
34
+ )
35
 
36
  TEMPERATURE = 0.3
37
  MAX_TOKENS = 1024
pyproject.toml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "supportenv"
7
+ version = "1.0.0"
8
+ description = "OpenEnv customer support ticket triage benchmark"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "fastapi>=0.111.0",
13
+ "uvicorn[standard]>=0.30.0",
14
+ "pydantic>=2.7.0",
15
+ "openai>=1.35.0",
16
+ "httpx>=0.27.0",
17
+ "python-multipart>=0.0.9",
18
+ "requests>=2.31.0",
19
+ "openenv-core>=0.2.0",
20
+ ]
21
+
22
+ [project.scripts]
23
+ server = "server.app:main"
24
+
25
+ [tool.setuptools]
26
+ py-modules = ["app", "data", "environment", "graders", "inference", "models"]
27
+ packages = ["server"]
server/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """SupportEnv server package."""
server/app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Validator-friendly server entrypoint for SupportEnv."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+
6
+ import uvicorn
7
+
8
+
9
+ def main() -> None:
10
+ """Launch the FastAPI app on the Hugging Face expected host/port."""
11
+ port = int(os.environ.get("PORT", "7860"))
12
+ uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1)
13
+
14
+
15
+ if __name__ == "__main__":
16
+ main()
setup.md CHANGED
@@ -1,214 +1,119 @@
1
- # SETUP.md Local Development Guide
2
 
3
- ## Prerequisites
4
 
5
- - Python 3.10+ ([download](https://www.python.org/downloads/))
6
- - Git
7
- - Docker (optional, for containerised run)
8
- - An OpenAI API key (optional, only for the LLM baseline)
9
 
10
- ---
 
 
 
 
11
 
12
- ## 1. Clone the repository
13
 
14
- ```bash
15
- git clone https://github.com/Shivoo29/dummy_1.git
16
- cd dummy_1
17
- git checkout claude/openenv-ai-agent-environment-qJ9pB
18
- ```
19
 
20
- ---
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- ## 2. Create a virtual environment
23
 
24
- ```bash
25
- python -m venv .venv
26
 
27
- # macOS / Linux
28
- source .venv/bin/activate
29
 
30
- # Windows (PowerShell)
 
31
  .venv\Scripts\Activate.ps1
32
- ```
33
-
34
- ---
35
-
36
- ## 3. Install dependencies
37
-
38
- ```bash
39
  pip install -r requirements.txt
40
  ```
41
 
42
- ---
43
-
44
- ## 4. Run the server
45
 
46
  ```bash
47
- uvicorn app:app --host 0.0.0.0 --port 7860 --reload
 
 
48
  ```
49
 
50
- - API: http://localhost:7860
51
- - Interactive docs (Swagger UI): http://localhost:7860/docs
52
- - ReDoc: http://localhost:7860/redoc
53
 
54
- ---
55
 
56
- ## 5. Quick smoke test
57
 
58
  ```bash
59
- # Health check
60
- curl http://localhost:7860/health
61
-
62
- # List tasks
63
- curl http://localhost:7860/tasks
64
-
65
- # Start a task1 episode
66
- curl -X POST http://localhost:7860/reset \
67
- -H "Content-Type: application/json" \
68
- -d '{"task_id": "task1", "ticket_index": 0}'
69
-
70
- # The response contains an episode_id — use it below
71
- EPISODE_ID="<paste episode_id here>"
72
-
73
- # Submit a classification action
74
- curl -X POST http://localhost:7860/step \
75
- -H "Content-Type: application/json" \
76
- -d "{\"episode_id\": \"$EPISODE_ID\", \"action\": {\"action_type\": \"classify\", \"category\": \"billing\", \"priority\": \"high\"}}"
77
-
78
- # Submit to close the episode
79
- curl -X POST http://localhost:7860/step \
80
- -H "Content-Type: application/json" \
81
- -d "{\"episode_id\": \"$EPISODE_ID\", \"action\": {\"action_type\": \"submit\"}}"
82
-
83
- # Grade the episode
84
- curl -X POST http://localhost:7860/grader \
85
- -H "Content-Type: application/json" \
86
- -d "{\"episode_id\": \"$EPISODE_ID\"}"
87
  ```
88
 
89
- ---
90
-
91
- ## 6. Run the baseline
92
-
93
- ### Heuristic baseline (no API key required)
94
 
95
  ```bash
96
- # Single ticket (ticket_index 0)
97
- python baseline.py --mode heuristic
98
-
99
- # All 5 tickets per task, averaged
100
- python baseline.py --mode heuristic --all-tickets
101
- ```
102
-
103
- Expected output:
104
- ```
105
- task1: 0.8600 (scores: [1.0, 1.0, 1.0, 1.0, 0.3])
106
- task2: 0.5614 (scores: [0.8, 0.386, 0.45, 0.7, 0.471])
107
- task3: 0.9895 (scores: [1.0, 0.992, 0.961, 0.994, 1.0])
108
- OVERALL AVERAGE: 0.8036
109
  ```
110
 
111
- ### LLM baseline (requires OpenAI API key)
112
 
113
  ```bash
114
- export OPENAI_API_KEY="sk-..." # macOS/Linux
115
- # $env:OPENAI_API_KEY="sk-..." # Windows PowerShell
116
-
117
- python baseline.py --mode llm --model gpt-4o-mini
118
- python baseline.py --mode llm --model gpt-4o-mini --all-tickets
119
  ```
120
 
121
- ---
122
-
123
- ## 7. Run with Docker
124
 
125
  ```bash
126
- # Build
127
- docker build -t supportenv .
128
-
129
- # Run (no API key needed for heuristic mode)
130
- docker run -p 7860:7860 supportenv
131
-
132
- # Run with OpenAI key for LLM baseline
133
- docker run -p 7860:7860 -e OPENAI_API_KEY="sk-..." supportenv
134
  ```
135
 
136
- ---
137
-
138
- ## 8. Project layout
139
 
140
- ```
141
- dummy_1/
142
- ├── app.py FastAPI server — all HTTP endpoints
143
- ├── environment.py Episode lifecycle: reset / step / state / grade
144
- ├── graders.py Deterministic graders for all 3 tasks
145
- ├── data.py 15 pre-defined tickets + ground truth answers
146
- ├── models.py Pydantic typed models (Observation, Action, Reward…)
147
- ├── baseline.py Heuristic + LLM baseline inference scripts
148
- ├── openenv.yaml OpenEnv spec metadata
149
- ├── Dockerfile HF Spaces-compatible container (port 7860)
150
- ├── requirements.txt Python dependencies
151
- ├── README.md Full environment documentation
152
- └── SETUP.md This file
153
  ```
154
 
155
- ---
156
 
157
- ## 9. Key files to edit when extending
158
 
159
- | What you want to change | File to edit |
160
- |------------------------|-------------|
161
- | Add / modify tickets | `data.py` `TASK1/2/3_TICKETS` lists |
162
- | Change grader weights | `graders.py` `grade_task1/2/3()` |
163
- | Add a new task | `data.py` (add task meta) + `graders.py` + `app.py` (`_ACTION_SCHEMAS`) |
164
- | Change reward shaping | `environment.py` — `_step_reward_task*` functions and constants |
165
- | Add an endpoint | `app.py` |
166
- | Change typed models | `models.py` |
167
 
168
- ---
169
 
170
- ## 10. Deploy to Hugging Face Spaces
171
 
172
- 1. Create a new Space at https://huggingface.co/new-space
173
- - SDK: **Docker**
174
- - Visibility: Public
175
- 2. Add the HF Space as a remote:
176
- ```bash
177
- git remote add hf https://huggingface.co/spaces/<your-username>/<space-name>
178
- ```
179
- 3. Push:
180
- ```bash
181
- git push hf claude/openenv-ai-agent-environment-qJ9pB:main
182
- ```
183
- 4. The Space auto-builds from the `Dockerfile` and exposes port 7860.
184
 
185
- ---
186
 
187
- ## 11. Environment variables
188
 
189
- | Variable | Required | Description |
190
- |----------|----------|-------------|
191
- | `OPENAI_API_KEY` | Only for LLM baseline | Your OpenAI API key |
192
- | `PORT` | No (default 7860) | Override server port |
193
 
194
- ---
195
 
196
- ## 12. Running tests
197
 
198
- ```bash
199
- python -c "
200
- import environment as env
201
- from models import Action
202
-
203
- # Verify all 3 tasks reset and grade correctly
204
- for task_id in ['task1', 'task2', 'task3']:
205
- for i in range(5):
206
- obs = env.reset(task_id, i)
207
- env.step(obs.episode_id, Action(action_type='submit'))
208
- gr = env.grade(obs.episode_id)
209
- assert 0.0 <= gr.score <= 1.0, f'Score out of range: {gr.score}'
210
- print(f'{task_id} ticket[{i}]: score={gr.score:.4f} OK')
211
-
212
- print('All tests passed.')
213
- "
214
- ```
 
1
+ # setup.md - SupportEnv Validator-Focused Runbook
2
 
3
+ ## 1. What judges/validator execute
4
 
5
+ Most checks align to this flow:
 
 
 
6
 
7
+ 1. `POST /reset` on the deployed Space
8
+ 2. `docker build` from repo root
9
+ 3. `openenv validate`
10
+ 4. endpoint contract checks for `/health`, `/reset`, `/step`, `/state`, `/grader`
11
+ 5. `python inference.py` and stdout format check for `[START]`, `[STEP]`, `[END]`
12
 
 
13
 
14
+ ## 2. File-by-file usage (root)
 
 
 
 
15
 
16
+ - `app.py`: FastAPI API surface (`/reset`, `/step`, `/state`, `/tasks`, `/grader`, `/health`)
17
+ - `environment.py`: episode lifecycle and reward accumulation (`reset`, `step`, `get_state`, `grade`)
18
+ - `graders.py`: deterministic terminal scoring per task with score clamped to `[0.0, 1.0]`
19
+ - `data.py`: task metadata and ticket datasets with ground truth labels/entities/steps
20
+ - `models.py`: typed Pydantic models used by API and internal state
21
+ - `inference.py`: baseline runner; calls the API, logs strict `[START]/[STEP]/[END]`
22
+ - `openenv.yaml`: OpenEnv metadata and interface declaration used by validator
23
+ - `Dockerfile`: image build/runtime contract for HF Docker Spaces (serves on `7860`)
24
+ - `requirements.txt`: runtime dependencies
25
+ - `pyproject.toml`: packaging metadata + script entrypoint expected by validator tooling
26
+ - `uv.lock`: lockfile required by OpenEnv multi-mode validation path
27
+ - `server/app.py`: validator-friendly script entrypoint (`server = server.app:main`)
28
 
 
29
 
30
+ ## 3. Local setup
 
31
 
32
+ ### Windows PowerShell
 
33
 
34
+ ```powershell
35
+ python -m venv .venv
36
  .venv\Scripts\Activate.ps1
 
 
 
 
 
 
 
37
  pip install -r requirements.txt
38
  ```
39
 
40
+ ### macOS/Linux
 
 
41
 
42
  ```bash
43
+ python -m venv .venv
44
+ source .venv/bin/activate
45
+ pip install -r requirements.txt
46
  ```
47
 
 
 
 
48
 
49
+ ## 4. Validation checklist (exact order)
50
 
51
+ 1. OpenEnv validator
52
 
53
  ```bash
54
+ .venv/Scripts/openenv.exe validate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ```
56
 
57
+ 2. Docker build
 
 
 
 
58
 
59
  ```bash
60
+ docker build -t supportenv .
 
 
 
 
 
 
 
 
 
 
 
 
61
  ```
62
 
63
+ 3. Run server locally
64
 
65
  ```bash
66
+ uvicorn app:app --host 0.0.0.0 --port 7860
 
 
 
 
67
  ```
68
 
69
+ 4. API checks
 
 
70
 
71
  ```bash
72
+ curl http://127.0.0.1:7860/health
73
+ curl -X POST http://127.0.0.1:7860/reset -H "Content-Type: application/json" -d '{"task_id":"task1","ticket_index":0}'
74
+ curl -X POST http://127.0.0.1:7860/step -H "Content-Type: application/json" -d '{"episode_id":"<id>","action":{"action_type":"classify","category":"billing","priority":"high"}}'
75
+ curl -X POST http://127.0.0.1:7860/state?episode_id=<id>
76
+ curl -X POST http://127.0.0.1:7860/grader -H "Content-Type: application/json" -d '{"episode_id":"<id>"}'
 
 
 
77
  ```
78
 
79
+ 5. Baseline inference
 
 
80
 
81
+ ```bash
82
+ python inference.py
 
 
 
 
 
 
 
 
 
 
 
83
  ```
84
 
 
85
 
86
+ ## 5. Docker and Spaces runtime model
87
 
88
+ - Build stage installs from `requirements.txt`.
89
+ - Runtime command runs Uvicorn: `app:app` on `0.0.0.0:7860`.
90
+ - HF Space should set `sdk: docker` and `app_port: 7860` in `README.md` frontmatter.
91
+ - Healthcheck points at `/health` to indicate container liveness.
92
+ - If Docker daemon is not running locally, `docker build`/`docker run` will fail even if repo is correct.
 
 
 
93
 
 
94
 
95
+ ## 6. Inference variables
96
 
97
+ - Required for LLM call path:
98
+ - `API_BASE_URL`
99
+ - `MODEL_NAME`
100
+ - `HF_TOKEN`
101
+ - Environment endpoint:
102
+ - `OPENENV_BASE_URL` (preferred)
103
+ - `API_BASE_URL_ENV` (legacy alias)
 
 
 
 
 
104
 
 
105
 
106
+ ## 7. Example scorer sanity checks
107
 
108
+ - Task 1: submit `classify` then `submit`, verify non-binary reward and final score in `[0, 1]`
109
+ - Task 2: include deterministic entity/action coverage keys from ticket text
110
+ - Task 3: include professional response plus ordered resolution steps
 
111
 
 
112
 
113
+ ## 8. Common failure causes
114
 
115
+ - Missing `pyproject.toml` or `uv.lock`
116
+ - Missing script entrypoint (`server = server.app:main`)
117
+ - App not serving on `0.0.0.0:7860`
118
+ - Duplicate HF variable/secret names in Space settings
119
+ - Invalid or missing `HF_TOKEN` for real LLM inference
 
 
 
 
 
 
 
 
 
 
 
 
uv.lock ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ version = 1
2
+ revision = 1
3
+ requires-python = ">=3.10"
4
+
5
+ [[package]]
6
+ name = "supportenv"
7
+ version = "1.0.0"
8
+ source = { editable = "." }