Priyansh Saxena commited on
Commit
7c54da3
·
1 Parent(s): dbab76b

fix: pyproject.toml issue

Browse files
Files changed (3) hide show
  1. inference.py +69 -72
  2. openenv.yaml +1 -1
  3. pyproject.toml +23 -0
inference.py CHANGED
@@ -7,44 +7,36 @@ from typing import List
7
  from openai import OpenAI
8
  import httpx
9
 
10
- API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
11
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
12
- API_KEY = os.environ.get("OPENAI_API_KEY", "dummy")
13
  ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
14
- TASK_NAME = os.environ.get("TASK_NAME", "easy")
15
  MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
16
  SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
 
19
 
20
  def log_start(task, env, model):
21
- print(json.dumps({
22
- "type": "START",
23
- "task": task,
24
- "env": env,
25
- "model": model,
26
- }), flush=True)
27
 
28
 
29
  def log_step(step, action, reward, done, error):
30
- print(json.dumps({
31
- "type": "STEP",
32
- "step": step,
33
- "action": action,
34
- "reward": float(reward),
35
- "done": bool(done),
36
- "error": error,
37
- }), flush=True)
38
-
39
-
40
- def log_end(success, steps, score, rewards):
41
- print(json.dumps({
42
- "type": "END",
43
- "success": bool(success),
44
- "steps": steps,
45
- "score": float(score),
46
- "rewards": [float(r) for r in rewards],
47
- }), flush=True)
48
 
49
 
50
  def get_model_message(client: OpenAI, observation: dict, history: List[str]) -> str:
@@ -75,51 +67,56 @@ History: {history}
75
 
76
  async def main():
77
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
78
- rewards = []
79
- history = []
80
- steps_taken = 0
81
- score = 0.0
82
- success = False
83
-
84
- log_start(task=TASK_NAME, env="pytorch-debug-env", model=MODEL_NAME)
85
-
86
- async with httpx.AsyncClient(timeout=60.0) as session:
87
- reset_resp = await session.post(f"{ENV_URL}/reset", params={"task_id": TASK_NAME})
88
- reset_resp.raise_for_status()
89
- result = reset_resp.json()
90
- session_id = result.get("session_id")
91
- observation = result["observation"]
92
-
93
- for step in range(1, MAX_STEPS + 1):
94
- if result.get("done"):
95
- break
96
-
97
- action_text = get_model_message(client, observation, history)
98
- try:
99
- action_json = json.loads(action_text)
100
- step_resp = await session.post(f"{ENV_URL}/step", params={"session_id": session_id}, json=action_json)
101
- step_resp.raise_for_status()
102
- result = step_resp.json()
103
- reward = result.get("reward", 0.0)
104
- done = result.get("done", False)
105
- error = None
106
- observation = result["observation"]
107
- except Exception as exc:
108
- reward = 0.0
109
- done = True
110
- error = str(exc)
111
-
112
- rewards.append(reward)
113
- steps_taken = step
114
- log_step(step=step, action=action_text, reward=reward, done=done, error=error)
115
- history.append(f"step={step} reward={reward:.3f}")
116
-
117
- if done:
118
- break
119
-
120
- score = min(max(rewards[-1] if rewards else 0.0, 0.0), 1.0)
121
- success = score >= SUCCESS_SCORE_THRESHOLD
122
- log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
 
 
 
 
123
 
124
 
125
  if __name__ == "__main__":
 
7
  from openai import OpenAI
8
  import httpx
9
 
10
+ API_BASE_URL = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
11
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
12
+ API_KEY = os.environ.get("HF_TOKEN") or os.environ.get("API_KEY") or os.environ.get("OPENAI_API_KEY", "dummy")
13
  ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
14
+ TASKS = os.environ.get("TASKS", "easy,medium,hard")
15
  MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
16
  SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
 
19
 
20
  def log_start(task, env, model):
21
+ print(f"[START] task={task} env={env} model={model}", flush=True)
 
 
 
 
 
22
 
23
 
24
  def log_step(step, action, reward, done, error):
25
+ err = "null" if error is None else str(error)
26
+ done_str = "true" if done else "false"
27
+ print(
28
+ f"[STEP] step={step} action={action} reward={reward:.2f} done={done_str} error={err}",
29
+ flush=True,
30
+ )
31
+
32
+
33
+ def log_end(success, steps, rewards):
34
+ success_str = "true" if success else "false"
35
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
36
+ print(
37
+ f"[END] success={success_str} steps={steps} rewards={rewards_str}",
38
+ flush=True,
39
+ )
 
 
 
40
 
41
 
42
  def get_model_message(client: OpenAI, observation: dict, history: List[str]) -> str:
 
67
 
68
  async def main():
69
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
70
+ tasks = [task.strip() for task in TASKS.split(",") if task.strip()]
71
+
72
+ for task in tasks:
73
+ rewards = []
74
+ history = []
75
+ steps_taken = 0
76
+
77
+ log_start(task=task, env="pytorch-debug-env", model=MODEL_NAME)
78
+
79
+ async with httpx.AsyncClient(timeout=60.0) as session:
80
+ reset_resp = await session.post(f"{ENV_URL}/reset", params={"task_id": task})
81
+ reset_resp.raise_for_status()
82
+ result = reset_resp.json()
83
+ session_id = result.get("session_id")
84
+ observation = result["observation"]
85
+
86
+ for step in range(1, MAX_STEPS + 1):
87
+ if result.get("done"):
88
+ break
89
+
90
+ action_text = get_model_message(client, observation, history)
91
+ try:
92
+ action_json = json.loads(action_text)
93
+ step_resp = await session.post(
94
+ f"{ENV_URL}/step",
95
+ params={"session_id": session_id},
96
+ json=action_json,
97
+ )
98
+ step_resp.raise_for_status()
99
+ result = step_resp.json()
100
+ reward = result.get("reward", 0.0)
101
+ done = result.get("done", False)
102
+ error = None
103
+ observation = result["observation"]
104
+ except Exception as exc:
105
+ reward = 0.0
106
+ done = True
107
+ error = str(exc)
108
+
109
+ rewards.append(reward)
110
+ steps_taken = step
111
+ log_step(step=step, action=action_text, reward=reward, done=done, error=error)
112
+ history.append(f"step={step} reward={reward:.3f}")
113
+
114
+ if done:
115
+ break
116
+
117
+ score = min(max(rewards[-1] if rewards else 0.0, 0.0), 1.0)
118
+ success = score >= SUCCESS_SCORE_THRESHOLD
119
+ log_end(success=success, steps=steps_taken, rewards=rewards)
120
 
121
 
122
  if __name__ == "__main__":
openenv.yaml CHANGED
@@ -15,7 +15,7 @@ observation:
15
  class_name: PyTorchDebugObservation
16
  module: src.pytorch_debug_env.models
17
 
18
- default_image: pytorch-debug-env:latest
19
  spec_version: 1
20
 
21
  tags:
 
15
  class_name: PyTorchDebugObservation
16
  module: src.pytorch_debug_env.models
17
 
18
+ default_image: https://archcoder-pytorch-debug-env.hf.space
19
  spec_version: 1
20
 
21
  tags:
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.backends.legacy:BuildBackend"
4
+
5
+ [project]
6
+ name = "pytorch-debug-env"
7
+ version = "1.0.0"
8
+ requires-python = ">=3.11"
9
+ dependencies = [
10
+ "fastapi==0.115.0",
11
+ "uvicorn[standard]==0.30.6",
12
+ "pydantic==2.9.2",
13
+ "numpy==2.1.1",
14
+ "openai==1.51.0",
15
+ "httpx==0.27.2",
16
+ "openenv>=0.1.0",
17
+ ]
18
+
19
+ [tool.setuptools.packages.find]
20
+ where = ["src"]
21
+
22
+ [tool.pytest.ini_options]
23
+ asyncio_mode = "auto"