Priyansh Saxena commited on
Commit ·
7c54da3
1
Parent(s): dbab76b
fix: pyproject.toml issue
Browse files- inference.py +69 -72
- openenv.yaml +1 -1
- pyproject.toml +23 -0
inference.py
CHANGED
|
@@ -7,44 +7,36 @@ from typing import List
|
|
| 7 |
from openai import OpenAI
|
| 8 |
import httpx
|
| 9 |
|
| 10 |
-
API_BASE_URL = os.environ.get("API_BASE_URL", "https://
|
| 11 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
|
| 12 |
-
API_KEY = os.environ.get("OPENAI_API_KEY", "dummy")
|
| 13 |
ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
|
| 14 |
-
|
| 15 |
MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
|
| 16 |
SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
|
| 19 |
|
| 20 |
def log_start(task, env, model):
|
| 21 |
-
print(
|
| 22 |
-
"type": "START",
|
| 23 |
-
"task": task,
|
| 24 |
-
"env": env,
|
| 25 |
-
"model": model,
|
| 26 |
-
}), flush=True)
|
| 27 |
|
| 28 |
|
| 29 |
def log_step(step, action, reward, done, error):
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
"action
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
print(
|
| 42 |
-
"
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
"score": float(score),
|
| 46 |
-
"rewards": [float(r) for r in rewards],
|
| 47 |
-
}), flush=True)
|
| 48 |
|
| 49 |
|
| 50 |
def get_model_message(client: OpenAI, observation: dict, history: List[str]) -> str:
|
|
@@ -75,51 +67,56 @@ History: {history}
|
|
| 75 |
|
| 76 |
async def main():
|
| 77 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
if __name__ == "__main__":
|
|
|
|
| 7 |
from openai import OpenAI
|
| 8 |
import httpx
|
| 9 |
|
| 10 |
+
API_BASE_URL = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 11 |
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
|
| 12 |
+
API_KEY = os.environ.get("HF_TOKEN") or os.environ.get("API_KEY") or os.environ.get("OPENAI_API_KEY", "dummy")
|
| 13 |
ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
|
| 14 |
+
TASKS = os.environ.get("TASKS", "easy,medium,hard")
|
| 15 |
MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
|
| 16 |
SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
|
| 19 |
|
| 20 |
def log_start(task, env, model):
|
| 21 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def log_step(step, action, reward, done, error):
|
| 25 |
+
err = "null" if error is None else str(error)
|
| 26 |
+
done_str = "true" if done else "false"
|
| 27 |
+
print(
|
| 28 |
+
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_str} error={err}",
|
| 29 |
+
flush=True,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def log_end(success, steps, rewards):
|
| 34 |
+
success_str = "true" if success else "false"
|
| 35 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 36 |
+
print(
|
| 37 |
+
f"[END] success={success_str} steps={steps} rewards={rewards_str}",
|
| 38 |
+
flush=True,
|
| 39 |
+
)
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def get_model_message(client: OpenAI, observation: dict, history: List[str]) -> str:
|
|
|
|
| 67 |
|
| 68 |
async def main():
|
| 69 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 70 |
+
tasks = [task.strip() for task in TASKS.split(",") if task.strip()]
|
| 71 |
+
|
| 72 |
+
for task in tasks:
|
| 73 |
+
rewards = []
|
| 74 |
+
history = []
|
| 75 |
+
steps_taken = 0
|
| 76 |
+
|
| 77 |
+
log_start(task=task, env="pytorch-debug-env", model=MODEL_NAME)
|
| 78 |
+
|
| 79 |
+
async with httpx.AsyncClient(timeout=60.0) as session:
|
| 80 |
+
reset_resp = await session.post(f"{ENV_URL}/reset", params={"task_id": task})
|
| 81 |
+
reset_resp.raise_for_status()
|
| 82 |
+
result = reset_resp.json()
|
| 83 |
+
session_id = result.get("session_id")
|
| 84 |
+
observation = result["observation"]
|
| 85 |
+
|
| 86 |
+
for step in range(1, MAX_STEPS + 1):
|
| 87 |
+
if result.get("done"):
|
| 88 |
+
break
|
| 89 |
+
|
| 90 |
+
action_text = get_model_message(client, observation, history)
|
| 91 |
+
try:
|
| 92 |
+
action_json = json.loads(action_text)
|
| 93 |
+
step_resp = await session.post(
|
| 94 |
+
f"{ENV_URL}/step",
|
| 95 |
+
params={"session_id": session_id},
|
| 96 |
+
json=action_json,
|
| 97 |
+
)
|
| 98 |
+
step_resp.raise_for_status()
|
| 99 |
+
result = step_resp.json()
|
| 100 |
+
reward = result.get("reward", 0.0)
|
| 101 |
+
done = result.get("done", False)
|
| 102 |
+
error = None
|
| 103 |
+
observation = result["observation"]
|
| 104 |
+
except Exception as exc:
|
| 105 |
+
reward = 0.0
|
| 106 |
+
done = True
|
| 107 |
+
error = str(exc)
|
| 108 |
+
|
| 109 |
+
rewards.append(reward)
|
| 110 |
+
steps_taken = step
|
| 111 |
+
log_step(step=step, action=action_text, reward=reward, done=done, error=error)
|
| 112 |
+
history.append(f"step={step} reward={reward:.3f}")
|
| 113 |
+
|
| 114 |
+
if done:
|
| 115 |
+
break
|
| 116 |
+
|
| 117 |
+
score = min(max(rewards[-1] if rewards else 0.0, 0.0), 1.0)
|
| 118 |
+
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 119 |
+
log_end(success=success, steps=steps_taken, rewards=rewards)
|
| 120 |
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
openenv.yaml
CHANGED
|
@@ -15,7 +15,7 @@ observation:
|
|
| 15 |
class_name: PyTorchDebugObservation
|
| 16 |
module: src.pytorch_debug_env.models
|
| 17 |
|
| 18 |
-
default_image: pytorch-debug-env
|
| 19 |
spec_version: 1
|
| 20 |
|
| 21 |
tags:
|
|
|
|
| 15 |
class_name: PyTorchDebugObservation
|
| 16 |
module: src.pytorch_debug_env.models
|
| 17 |
|
| 18 |
+
default_image: https://archcoder-pytorch-debug-env.hf.space
|
| 19 |
spec_version: 1
|
| 20 |
|
| 21 |
tags:
|
pyproject.toml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=68"]
|
| 3 |
+
build-backend = "setuptools.backends.legacy:BuildBackend"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "pytorch-debug-env"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
requires-python = ">=3.11"
|
| 9 |
+
dependencies = [
|
| 10 |
+
"fastapi==0.115.0",
|
| 11 |
+
"uvicorn[standard]==0.30.6",
|
| 12 |
+
"pydantic==2.9.2",
|
| 13 |
+
"numpy==2.1.1",
|
| 14 |
+
"openai==1.51.0",
|
| 15 |
+
"httpx==0.27.2",
|
| 16 |
+
"openenv>=0.1.0",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[tool.setuptools.packages.find]
|
| 20 |
+
where = ["src"]
|
| 21 |
+
|
| 22 |
+
[tool.pytest.ini_options]
|
| 23 |
+
asyncio_mode = "auto"
|