Priyansh Saxena commited on
Commit ·
be50021
1
Parent(s): 1435892
fix: roundoff issue
Browse filesSigned-off-by: Priyansh Saxena <priyena.programming@gmail.com>
- inference.py +3 -2
- src/pytorch_debug_env/reward.py +1 -1
inference.py
CHANGED
|
@@ -16,6 +16,7 @@ MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
|
|
| 16 |
SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
SEED = os.environ.get("SEED")
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
def _parse_seed(value: str | None) -> int | None:
|
|
@@ -114,7 +115,7 @@ async def _run_task(task: str, client: OpenAI) -> None:
|
|
| 114 |
try:
|
| 115 |
action_text = get_model_message(client, observation, history)
|
| 116 |
except Exception as exc:
|
| 117 |
-
reward =
|
| 118 |
done = True
|
| 119 |
error = f"model_error: {exc}"
|
| 120 |
rewards.append(reward)
|
|
@@ -136,7 +137,7 @@ async def _run_task(task: str, client: OpenAI) -> None:
|
|
| 136 |
error = result.get("error")
|
| 137 |
observation = result.get("observation", observation)
|
| 138 |
except Exception as exc:
|
| 139 |
-
reward =
|
| 140 |
done = True
|
| 141 |
error = f"step_error: {exc}"
|
| 142 |
|
|
|
|
| 16 |
SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
|
| 17 |
MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
|
| 18 |
SEED = os.environ.get("SEED")
|
| 19 |
+
MIN_LOG_REWARD = 0.01
|
| 20 |
|
| 21 |
|
| 22 |
def _parse_seed(value: str | None) -> int | None:
|
|
|
|
| 115 |
try:
|
| 116 |
action_text = get_model_message(client, observation, history)
|
| 117 |
except Exception as exc:
|
| 118 |
+
reward = MIN_LOG_REWARD
|
| 119 |
done = True
|
| 120 |
error = f"model_error: {exc}"
|
| 121 |
rewards.append(reward)
|
|
|
|
| 137 |
error = result.get("error")
|
| 138 |
observation = result.get("observation", observation)
|
| 139 |
except Exception as exc:
|
| 140 |
+
reward = MIN_LOG_REWARD
|
| 141 |
done = True
|
| 142 |
error = f"step_error: {exc}"
|
| 143 |
|
src/pytorch_debug_env/reward.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
| 3 |
|
| 4 |
from .bug_library import BUG_CATEGORIES
|
| 5 |
|
| 6 |
-
EPSILON = 1e-
|
| 7 |
|
| 8 |
|
| 9 |
def clamp_score(value: float) -> float:
|
|
|
|
| 3 |
|
| 4 |
from .bug_library import BUG_CATEGORIES
|
| 5 |
|
| 6 |
+
EPSILON = 1e-2
|
| 7 |
|
| 8 |
|
| 9 |
def clamp_score(value: float) -> float:
|