Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +23 -1
inference.py
CHANGED
|
@@ -199,17 +199,39 @@ def run_task(task_id: str) -> float:
|
|
| 199 |
|
| 200 |
def main():
|
| 201 |
specific_task = TASK_ID
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
if specific_task and specific_task in ALL_TASKS:
|
| 203 |
-
run_task(specific_task)
|
|
|
|
| 204 |
else:
|
| 205 |
# Run all tasks so the validator sees graders for every task
|
| 206 |
all_scores = []
|
| 207 |
for t_id in ALL_TASKS:
|
| 208 |
score = run_task(t_id)
|
| 209 |
all_scores.append(score)
|
|
|
|
|
|
|
| 210 |
avg = sum(all_scores) / len(all_scores)
|
|
|
|
| 211 |
print(f"[SUMMARY] tasks={len(ALL_TASKS)} avg_score={avg:.4f}", flush=True)
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
| 215 |
main()
|
|
|
|
| 199 |
|
| 200 |
def main():
|
| 201 |
specific_task = TASK_ID
|
| 202 |
+
results_dir = "outputs"
|
| 203 |
+
os.makedirs(results_dir, exist_ok=True)
|
| 204 |
+
results_path = os.path.join(results_dir, "baseline_results.json")
|
| 205 |
+
|
| 206 |
+
final_data = {
|
| 207 |
+
"model": MODEL_NAME,
|
| 208 |
+
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 209 |
+
"tasks": {}
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
if specific_task and specific_task in ALL_TASKS:
|
| 213 |
+
score = run_task(specific_task)
|
| 214 |
+
final_data["tasks"][specific_task] = {"score": score}
|
| 215 |
else:
|
| 216 |
# Run all tasks so the validator sees graders for every task
|
| 217 |
all_scores = []
|
| 218 |
for t_id in ALL_TASKS:
|
| 219 |
score = run_task(t_id)
|
| 220 |
all_scores.append(score)
|
| 221 |
+
final_data["tasks"][t_id] = {"score": score}
|
| 222 |
+
|
| 223 |
avg = sum(all_scores) / len(all_scores)
|
| 224 |
+
final_data["avg_score"] = avg
|
| 225 |
print(f"[SUMMARY] tasks={len(ALL_TASKS)} avg_score={avg:.4f}", flush=True)
|
| 226 |
|
| 227 |
+
# Save to JSON for local tracking
|
| 228 |
+
try:
|
| 229 |
+
with open(results_path, "w") as f:
|
| 230 |
+
json.dump(final_data, f, indent=2)
|
| 231 |
+
print(f"[DEBUG] Results saved to {results_path}", flush=True)
|
| 232 |
+
except Exception as e:
|
| 233 |
+
print(f"[DEBUG] Could not save progress to JSON: {e}", flush=True)
|
| 234 |
+
|
| 235 |
|
| 236 |
if __name__ == "__main__":
|
| 237 |
main()
|