Spaces:
Running
Running
Harden HF job token wiring and persist full training outputs
Browse filesPass a real HF token secret into Jobs, push full trained weights/tokenizer, and upload run artifacts to the Space repo. Add explicit hard-task base-vs-trained reward evaluation so performance deltas are saved with graphs and metrics.
- launch_job.py +10 -1
- ultimate_sota_training.py +84 -33
launch_job.py
CHANGED
|
@@ -27,6 +27,7 @@ import os
|
|
| 27 |
import shlex
|
| 28 |
|
| 29 |
from huggingface_hub import HfApi
|
|
|
|
| 30 |
|
| 31 |
_DEFAULT_REPO = "https://huggingface.co/spaces/md896/sql-debug-env"
|
| 32 |
_REPO_URL = os.environ.get("TRAIN_REPO_GIT_URL", _DEFAULT_REPO)
|
|
@@ -45,8 +46,13 @@ _IMAGE = os.environ.get(
|
|
| 45 |
_NAMESPACE = os.environ.get("HF_JOB_NAMESPACE")
|
| 46 |
|
| 47 |
_SECRETS = None
|
|
|
|
| 48 |
if _SKIP_PUSH.strip().lower() not in ("1", "true", "yes"):
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# One line only — survives UI/API newline flattening.
|
| 52 |
_bash = (
|
|
@@ -65,6 +71,9 @@ _job_env = {
|
|
| 65 |
"ROWS_PER_TASK": _ROWS,
|
| 66 |
"GRPO_NUM_GENERATIONS": _NUM_GEN,
|
| 67 |
"SKIP_HUB_PUSH": _SKIP_PUSH,
|
|
|
|
|
|
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
if __name__ == "__main__":
|
|
|
|
| 27 |
import shlex
|
| 28 |
|
| 29 |
from huggingface_hub import HfApi
|
| 30 |
+
from huggingface_hub.utils import get_token
|
| 31 |
|
| 32 |
_DEFAULT_REPO = "https://huggingface.co/spaces/md896/sql-debug-env"
|
| 33 |
_REPO_URL = os.environ.get("TRAIN_REPO_GIT_URL", _DEFAULT_REPO)
|
|
|
|
| 46 |
_NAMESPACE = os.environ.get("HF_JOB_NAMESPACE")
|
| 47 |
|
| 48 |
_SECRETS = None
|
| 49 |
+
_local_hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or get_token()
|
| 50 |
if _SKIP_PUSH.strip().lower() not in ("1", "true", "yes"):
|
| 51 |
+
if _local_hf_token:
|
| 52 |
+
_SECRETS = {"HF_TOKEN": _local_hf_token}
|
| 53 |
+
else:
|
| 54 |
+
# Job can still train; push/upload steps in script will gracefully skip/fail with clear logs.
|
| 55 |
+
_SECRETS = None
|
| 56 |
|
| 57 |
# One line only — survives UI/API newline flattening.
|
| 58 |
_bash = (
|
|
|
|
| 71 |
"ROWS_PER_TASK": _ROWS,
|
| 72 |
"GRPO_NUM_GENERATIONS": _NUM_GEN,
|
| 73 |
"SKIP_HUB_PUSH": _SKIP_PUSH,
|
| 74 |
+
"ARTIFACT_SPACE_ID": os.environ.get("ARTIFACT_SPACE_ID", "md896/sql-debug-env"),
|
| 75 |
+
"MODEL_HUB_REPO_ID": os.environ.get("MODEL_HUB_REPO_ID", "md896/sql-debug-agent-qwen05b-grpo"),
|
| 76 |
+
"HARD_EVAL_SAMPLES": os.environ.get("HARD_EVAL_SAMPLES", "16"),
|
| 77 |
}
|
| 78 |
|
| 79 |
if __name__ == "__main__":
|
ultimate_sota_training.py
CHANGED
|
@@ -118,6 +118,7 @@ bootstrap_deps()
|
|
| 118 |
import httpx
|
| 119 |
import torch
|
| 120 |
from datasets import Dataset
|
|
|
|
| 121 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 122 |
from trl import GRPOConfig, GRPOTrainer
|
| 123 |
|
|
@@ -188,6 +189,24 @@ def make_real_dataset() -> Dataset:
|
|
| 188 |
return Dataset.from_list(rows)
|
| 189 |
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
# --- 3. One live OpenEnv reward (colab_real_world style) ---
|
| 192 |
|
| 193 |
|
|
@@ -223,6 +242,35 @@ def openenv_sql_reward_func(completions, task_id, **kwargs):
|
|
| 223 |
return rewards
|
| 224 |
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
# --- 3b. ARTIFACTS / PLOTS (REAL, FROM LOGS) ---
|
| 227 |
|
| 228 |
@dataclass(frozen=True)
|
|
@@ -372,33 +420,12 @@ def run_sota_train():
|
|
| 372 |
|
| 373 |
train_dataset = make_real_dataset()
|
| 374 |
|
| 375 |
-
def quick_exec_eval(max_items: int = 8) -> float:
|
| 376 |
-
"""Sample prompts, generate completions, score with the same OpenEnv SQL reward."""
|
| 377 |
-
subset = train_dataset.select(range(min(max_items, len(train_dataset))))
|
| 378 |
-
prompts = subset["prompt"]
|
| 379 |
-
task_ids = subset["task_id"]
|
| 380 |
-
|
| 381 |
-
completions: List[str] = []
|
| 382 |
-
for prompt in prompts:
|
| 383 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 384 |
-
with torch.no_grad():
|
| 385 |
-
out = model.generate(
|
| 386 |
-
**inputs,
|
| 387 |
-
max_new_tokens=256,
|
| 388 |
-
do_sample=True,
|
| 389 |
-
temperature=float(os.environ.get("EVAL_TEMPERATURE", "0.7")),
|
| 390 |
-
top_p=float(os.environ.get("EVAL_TOP_P", "0.9")),
|
| 391 |
-
renormalize_logits=True,
|
| 392 |
-
remove_invalid_values=True,
|
| 393 |
-
pad_token_id=tokenizer.eos_token_id,
|
| 394 |
-
)
|
| 395 |
-
completions.append(tokenizer.decode(out[0], skip_special_tokens=True))
|
| 396 |
-
|
| 397 |
-
rewards = openenv_sql_reward_func(completions, task_ids)
|
| 398 |
-
return float(sum(rewards) / max(len(rewards), 1))
|
| 399 |
-
|
| 400 |
print("Quick baseline eval (pre-train)...")
|
| 401 |
-
baseline_avg_reward =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
report_to = _resolve_report_to()
|
| 404 |
tb_dir = Path(out_dir) / "tensorboard"
|
|
@@ -452,7 +479,8 @@ def run_sota_train():
|
|
| 452 |
trainer.train()
|
| 453 |
|
| 454 |
print("Quick eval (post-train)...")
|
| 455 |
-
post_avg_reward =
|
|
|
|
| 456 |
|
| 457 |
# --- Save artifacts (real logs/plots) ---
|
| 458 |
artifacts = ArtifactPaths(root=Path(out_dir) / "artifacts")
|
|
@@ -474,6 +502,9 @@ def run_sota_train():
|
|
| 474 |
"baseline_avg_reward": baseline_avg_reward,
|
| 475 |
"post_avg_reward": post_avg_reward,
|
| 476 |
"delta_avg_reward": post_avg_reward - baseline_avg_reward,
|
|
|
|
|
|
|
|
|
|
| 477 |
"tensorboard_dir": str(tb_dir) if report_to == "tensorboard" else None,
|
| 478 |
"report_to": report_to,
|
| 479 |
}
|
|
@@ -497,20 +528,40 @@ def run_sota_train():
|
|
| 497 |
except Exception as e:
|
| 498 |
print(f"Could not generate before/after plot: {e}")
|
| 499 |
|
| 500 |
-
|
| 501 |
-
print("\nSaving
|
| 502 |
-
model.save_pretrained(
|
| 503 |
|
| 504 |
-
hub_id = os.environ.get("HF_HUB_REPO_ID", "md896/
|
| 505 |
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 506 |
if os.environ.get("SKIP_HUB_PUSH", "").strip() in ("1", "true", "yes"):
|
| 507 |
print("SKIP_HUB_PUSH set — not pushing to Hub.")
|
| 508 |
else:
|
| 509 |
try:
|
| 510 |
model.push_to_hub(hub_id, token=token)
|
| 511 |
-
|
|
|
|
| 512 |
except Exception as e:
|
| 513 |
-
print(f"Hub push failed (set HF_TOKEN /
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
print(f"\nTraining artifacts under {artifacts.root}")
|
| 516 |
|
|
|
|
| 118 |
import httpx
|
| 119 |
import torch
|
| 120 |
from datasets import Dataset
|
| 121 |
+
from huggingface_hub import HfApi
|
| 122 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 123 |
from trl import GRPOConfig, GRPOTrainer
|
| 124 |
|
|
|
|
| 189 |
return Dataset.from_list(rows)
|
| 190 |
|
| 191 |
|
| 192 |
+
def make_task_dataset(task_id: str, rows_per_task: int) -> Dataset:
|
| 193 |
+
bridge = get_bridge_url()
|
| 194 |
+
timeout = get_request_timeout()
|
| 195 |
+
marker = os.environ.get("COMPLETION_SQL_MARKER", "Fixed SQL:")
|
| 196 |
+
with httpx.Client(base_url=bridge, headers=BYPASS_HEADERS, timeout=timeout) as client:
|
| 197 |
+
resp = client.post("/reset", json={"task_id": task_id})
|
| 198 |
+
resp.raise_for_status()
|
| 199 |
+
obs = resp.json()["observation"]
|
| 200 |
+
prompt = (
|
| 201 |
+
"Fix the following SQL query and provide only the fixed SQL.\n"
|
| 202 |
+
f"Task: {obs['task_description']}\n"
|
| 203 |
+
f"Broken Query: {obs['original_query']}\n"
|
| 204 |
+
f"{marker}"
|
| 205 |
+
)
|
| 206 |
+
rows = [{"prompt": prompt, "task_id": task_id} for _ in range(max(1, rows_per_task))]
|
| 207 |
+
return Dataset.from_list(rows)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
# --- 3. One live OpenEnv reward (colab_real_world style) ---
|
| 211 |
|
| 212 |
|
|
|
|
| 242 |
return rewards
|
| 243 |
|
| 244 |
|
| 245 |
+
def eval_model_reward(
|
| 246 |
+
model: AutoModelForCausalLM,
|
| 247 |
+
tokenizer: AutoTokenizer,
|
| 248 |
+
dataset: Dataset,
|
| 249 |
+
*,
|
| 250 |
+
max_items: int,
|
| 251 |
+
) -> float:
|
| 252 |
+
subset = dataset.select(range(min(max_items, len(dataset))))
|
| 253 |
+
prompts = subset["prompt"]
|
| 254 |
+
task_ids = subset["task_id"]
|
| 255 |
+
completions: List[str] = []
|
| 256 |
+
for prompt in prompts:
|
| 257 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 258 |
+
with torch.no_grad():
|
| 259 |
+
out = model.generate(
|
| 260 |
+
**inputs,
|
| 261 |
+
max_new_tokens=256,
|
| 262 |
+
do_sample=True,
|
| 263 |
+
temperature=float(os.environ.get("EVAL_TEMPERATURE", "0.7")),
|
| 264 |
+
top_p=float(os.environ.get("EVAL_TOP_P", "0.9")),
|
| 265 |
+
renormalize_logits=True,
|
| 266 |
+
remove_invalid_values=True,
|
| 267 |
+
pad_token_id=tokenizer.eos_token_id,
|
| 268 |
+
)
|
| 269 |
+
completions.append(tokenizer.decode(out[0], skip_special_tokens=True))
|
| 270 |
+
rewards = openenv_sql_reward_func(completions, task_ids)
|
| 271 |
+
return float(sum(rewards) / max(len(rewards), 1))
|
| 272 |
+
|
| 273 |
+
|
| 274 |
# --- 3b. ARTIFACTS / PLOTS (REAL, FROM LOGS) ---
|
| 275 |
|
| 276 |
@dataclass(frozen=True)
|
|
|
|
| 420 |
|
| 421 |
train_dataset = make_real_dataset()
|
| 422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
print("Quick baseline eval (pre-train)...")
|
| 424 |
+
baseline_avg_reward = eval_model_reward(model, tokenizer, train_dataset, max_items=8)
|
| 425 |
+
|
| 426 |
+
hard_eval_n = int(os.environ.get("HARD_EVAL_SAMPLES", "16"))
|
| 427 |
+
hard_dataset = make_task_dataset("hard_finance_explosion", rows_per_task=hard_eval_n)
|
| 428 |
+
base_hard_reward = eval_model_reward(model, tokenizer, hard_dataset, max_items=hard_eval_n)
|
| 429 |
|
| 430 |
report_to = _resolve_report_to()
|
| 431 |
tb_dir = Path(out_dir) / "tensorboard"
|
|
|
|
| 479 |
trainer.train()
|
| 480 |
|
| 481 |
print("Quick eval (post-train)...")
|
| 482 |
+
post_avg_reward = eval_model_reward(model, tokenizer, train_dataset, max_items=8)
|
| 483 |
+
trained_hard_reward = eval_model_reward(model, tokenizer, hard_dataset, max_items=hard_eval_n)
|
| 484 |
|
| 485 |
# --- Save artifacts (real logs/plots) ---
|
| 486 |
artifacts = ArtifactPaths(root=Path(out_dir) / "artifacts")
|
|
|
|
| 502 |
"baseline_avg_reward": baseline_avg_reward,
|
| 503 |
"post_avg_reward": post_avg_reward,
|
| 504 |
"delta_avg_reward": post_avg_reward - baseline_avg_reward,
|
| 505 |
+
"base_hard_reward": base_hard_reward,
|
| 506 |
+
"trained_hard_reward": trained_hard_reward,
|
| 507 |
+
"delta_hard_reward": trained_hard_reward - base_hard_reward,
|
| 508 |
"tensorboard_dir": str(tb_dir) if report_to == "tensorboard" else None,
|
| 509 |
"report_to": report_to,
|
| 510 |
}
|
|
|
|
| 528 |
except Exception as e:
|
| 529 |
print(f"Could not generate before/after plot: {e}")
|
| 530 |
|
| 531 |
+
model_dir = os.environ.get("MODEL_SAVE_DIR", "./sota_sql_agent_full")
|
| 532 |
+
print("\nSaving trained model locally...")
|
| 533 |
+
model.save_pretrained(model_dir)
|
| 534 |
|
| 535 |
+
hub_id = os.environ.get("MODEL_HUB_REPO_ID", os.environ.get("HF_HUB_REPO_ID", "md896/sql-debug-agent-qwen05b-grpo"))
|
| 536 |
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 537 |
if os.environ.get("SKIP_HUB_PUSH", "").strip() in ("1", "true", "yes"):
|
| 538 |
print("SKIP_HUB_PUSH set — not pushing to Hub.")
|
| 539 |
else:
|
| 540 |
try:
|
| 541 |
model.push_to_hub(hub_id, token=token)
|
| 542 |
+
tokenizer.push_to_hub(hub_id, token=token)
|
| 543 |
+
print(f"Pushed trained model to https://huggingface.co/{hub_id}")
|
| 544 |
except Exception as e:
|
| 545 |
+
print(f"Hub push failed (set HF_TOKEN / MODEL_HUB_REPO_ID or SKIP_HUB_PUSH=1): {e}")
|
| 546 |
+
|
| 547 |
+
# Upload run artifacts back to the Space repo so you can download/view them.
|
| 548 |
+
artifact_space = os.environ.get("ARTIFACT_SPACE_ID", "md896/sql-debug-env")
|
| 549 |
+
run_tag = time.strftime("%Y%m%d-%H%M%S")
|
| 550 |
+
try:
|
| 551 |
+
if token:
|
| 552 |
+
api = HfApi(token=token)
|
| 553 |
+
api.upload_folder(
|
| 554 |
+
repo_id=artifact_space,
|
| 555 |
+
repo_type="space",
|
| 556 |
+
folder_path=str(artifacts.root),
|
| 557 |
+
path_in_repo=f"artifacts/runs/{run_tag}",
|
| 558 |
+
commit_message=f"Add training artifacts {run_tag}",
|
| 559 |
+
)
|
| 560 |
+
print(f"Uploaded artifacts to https://huggingface.co/spaces/{artifact_space}/tree/main/artifacts/runs/{run_tag}")
|
| 561 |
+
else:
|
| 562 |
+
print("No HF token in job env; skipping artifact upload.")
|
| 563 |
+
except Exception as e:
|
| 564 |
+
print(f"Artifact upload failed: {e}")
|
| 565 |
|
| 566 |
print(f"\nTraining artifacts under {artifacts.root}")
|
| 567 |
|