feat: add baseline scores JSON and update inference.py for hackathon spec compliance
Browse files- baseline_scores_heuristic.json +58 -0
- go.sum +0 -0
- python/inference.py +12 -3
baseline_scores_heuristic.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "meta-llama/llama-3.3-70b-instruct:free",
|
| 3 |
+
"api_base": "https://openrouter.ai/api/v1",
|
| 4 |
+
"episodes_per_task": 1,
|
| 5 |
+
"seed_base": 1000,
|
| 6 |
+
"fast_mode": true,
|
| 7 |
+
"llm_every": 4,
|
| 8 |
+
"max_steps": null,
|
| 9 |
+
"task_averages": {
|
| 10 |
+
"1": 0.708,
|
| 11 |
+
"2": 0.6328,
|
| 12 |
+
"3": 0.5983
|
| 13 |
+
},
|
| 14 |
+
"overall_average": 0.6463666666666666,
|
| 15 |
+
"all_results": [
|
| 16 |
+
{
|
| 17 |
+
"task_id": 1,
|
| 18 |
+
"seed": 1100,
|
| 19 |
+
"total_reward": 246.42219784256966,
|
| 20 |
+
"total_steps": 94,
|
| 21 |
+
"elapsed_sec": 1.5613129138946533,
|
| 22 |
+
"score": 0.708,
|
| 23 |
+
"sub_scores": {
|
| 24 |
+
"cost": 0.7079636116620143
|
| 25 |
+
},
|
| 26 |
+
"exploit_detected": false
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"task_id": 2,
|
| 30 |
+
"seed": 1200,
|
| 31 |
+
"total_reward": 242.81120610868118,
|
| 32 |
+
"total_steps": 95,
|
| 33 |
+
"elapsed_sec": 1.594855785369873,
|
| 34 |
+
"score": 0.6328,
|
| 35 |
+
"sub_scores": {
|
| 36 |
+
"cost": 0.7005224090103834,
|
| 37 |
+
"temperature": 0.53125
|
| 38 |
+
},
|
| 39 |
+
"exploit_detected": false
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"task_id": 3,
|
| 43 |
+
"seed": 1300,
|
| 44 |
+
"total_reward": 251.7133773862143,
|
| 45 |
+
"total_steps": 94,
|
| 46 |
+
"elapsed_sec": 1.6321852207183838,
|
| 47 |
+
"score": 0.5983,
|
| 48 |
+
"sub_scores": {
|
| 49 |
+
"batch_deadline": 1,
|
| 50 |
+
"carbon": 0.6563888726735232,
|
| 51 |
+
"cost": 0.6695079035324871,
|
| 52 |
+
"grid_response": 0.21428571428571427,
|
| 53 |
+
"temperature": 0.5833333333333334
|
| 54 |
+
},
|
| 55 |
+
"exploit_detected": false
|
| 56 |
+
}
|
| 57 |
+
]
|
| 58 |
+
}
|
go.sum
ADDED
|
File without changes
|
python/inference.py
CHANGED
|
@@ -47,9 +47,16 @@ except ImportError:
|
|
| 47 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 48 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/llama-3.3-70b-instruct:free")
|
| 49 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 52 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY"
|
|
|
|
|
|
|
| 53 |
DEFAULT_EPISODES = 1
|
| 54 |
DEFAULT_SEED_BASE = 1000
|
| 55 |
MAX_RETRIES = 3
|
|
@@ -143,9 +150,11 @@ class LLMAgent:
|
|
| 143 |
"""OpenAI-compatible LLM agent that chooses actions given observations."""
|
| 144 |
|
| 145 |
def __init__(self):
|
|
|
|
|
|
|
| 146 |
self.client = OpenAI(
|
| 147 |
base_url=API_BASE_URL,
|
| 148 |
-
api_key=OPENAI_API_KEY
|
| 149 |
)
|
| 150 |
self.model = MODEL_NAME
|
| 151 |
self.fallback_mode = False
|
|
|
|
| 47 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 48 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/llama-3.3-70b-instruct:free")
|
| 49 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
|
| 50 |
+
|
| 51 |
+
# ── Hackathon Spec Compliance: HF_TOKEN → OpenAI API Key ──────────────────
|
| 52 |
+
# Per hackathon spec, the LLM API credential is read from HF_TOKEN environment variable
|
| 53 |
+
# and passed directly to the OpenAI client for initialization.
|
| 54 |
+
# Primary: HF_TOKEN (hackathon spec requirement)
|
| 55 |
+
# Fallback: OPENAI_API_KEY (for local testing/development)
|
| 56 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 57 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or HF_TOKEN
|
| 58 |
+
if not OPENAI_API_KEY:
|
| 59 |
+
raise ValueError("HF_TOKEN or OPENAI_API_KEY environment variable is required")
|
| 60 |
DEFAULT_EPISODES = 1
|
| 61 |
DEFAULT_SEED_BASE = 1000
|
| 62 |
MAX_RETRIES = 3
|
|
|
|
| 150 |
"""OpenAI-compatible LLM agent that chooses actions given observations."""
|
| 151 |
|
| 152 |
def __init__(self):
|
| 153 |
+
# Initialize OpenAI client with credentials from HF_TOKEN (per hackathon spec)
|
| 154 |
+
# The OPENAI_API_KEY variable contains the HF_TOKEN value passed by evaluators
|
| 155 |
self.client = OpenAI(
|
| 156 |
base_url=API_BASE_URL,
|
| 157 |
+
api_key=OPENAI_API_KEY,
|
| 158 |
)
|
| 159 |
self.model = MODEL_NAME
|
| 160 |
self.fallback_mode = False
|