md896 commited on
Commit
9552aaf
·
1 Parent(s): 6083a40

Harden HF job token wiring and persist full training outputs

Browse files

Pass a real HF token secret into Jobs, push full trained weights/tokenizer, and upload run artifacts to the Space repo. Add explicit hard-task base-vs-trained reward evaluation so performance deltas are saved with graphs and metrics.

Files changed (2) hide show
  1. launch_job.py +10 -1
  2. ultimate_sota_training.py +84 -33
launch_job.py CHANGED
@@ -27,6 +27,7 @@ import os
27
  import shlex
28
 
29
  from huggingface_hub import HfApi
 
30
 
31
  _DEFAULT_REPO = "https://huggingface.co/spaces/md896/sql-debug-env"
32
  _REPO_URL = os.environ.get("TRAIN_REPO_GIT_URL", _DEFAULT_REPO)
@@ -45,8 +46,13 @@ _IMAGE = os.environ.get(
45
  _NAMESPACE = os.environ.get("HF_JOB_NAMESPACE")
46
 
47
  _SECRETS = None
 
48
  if _SKIP_PUSH.strip().lower() not in ("1", "true", "yes"):
49
- _SECRETS = {"HF_TOKEN": "HF_TOKEN"}
 
 
 
 
50
 
51
  # One line only — survives UI/API newline flattening.
52
  _bash = (
@@ -65,6 +71,9 @@ _job_env = {
65
  "ROWS_PER_TASK": _ROWS,
66
  "GRPO_NUM_GENERATIONS": _NUM_GEN,
67
  "SKIP_HUB_PUSH": _SKIP_PUSH,
 
 
 
68
  }
69
 
70
  if __name__ == "__main__":
 
27
  import shlex
28
 
29
  from huggingface_hub import HfApi
30
+ from huggingface_hub.utils import get_token
31
 
32
  _DEFAULT_REPO = "https://huggingface.co/spaces/md896/sql-debug-env"
33
  _REPO_URL = os.environ.get("TRAIN_REPO_GIT_URL", _DEFAULT_REPO)
 
46
  _NAMESPACE = os.environ.get("HF_JOB_NAMESPACE")
47
 
48
  _SECRETS = None
49
+ _local_hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or get_token()
50
  if _SKIP_PUSH.strip().lower() not in ("1", "true", "yes"):
51
+ if _local_hf_token:
52
+ _SECRETS = {"HF_TOKEN": _local_hf_token}
53
+ else:
54
+ # Job can still train; push/upload steps in script will gracefully skip/fail with clear logs.
55
+ _SECRETS = None
56
 
57
  # One line only — survives UI/API newline flattening.
58
  _bash = (
 
71
  "ROWS_PER_TASK": _ROWS,
72
  "GRPO_NUM_GENERATIONS": _NUM_GEN,
73
  "SKIP_HUB_PUSH": _SKIP_PUSH,
74
+ "ARTIFACT_SPACE_ID": os.environ.get("ARTIFACT_SPACE_ID", "md896/sql-debug-env"),
75
+ "MODEL_HUB_REPO_ID": os.environ.get("MODEL_HUB_REPO_ID", "md896/sql-debug-agent-qwen05b-grpo"),
76
+ "HARD_EVAL_SAMPLES": os.environ.get("HARD_EVAL_SAMPLES", "16"),
77
  }
78
 
79
  if __name__ == "__main__":
ultimate_sota_training.py CHANGED
@@ -118,6 +118,7 @@ bootstrap_deps()
118
  import httpx
119
  import torch
120
  from datasets import Dataset
 
121
  from transformers import AutoModelForCausalLM, AutoTokenizer
122
  from trl import GRPOConfig, GRPOTrainer
123
 
@@ -188,6 +189,24 @@ def make_real_dataset() -> Dataset:
188
  return Dataset.from_list(rows)
189
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  # --- 3. One live OpenEnv reward (colab_real_world style) ---
192
 
193
 
@@ -223,6 +242,35 @@ def openenv_sql_reward_func(completions, task_id, **kwargs):
223
  return rewards
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  # --- 3b. ARTIFACTS / PLOTS (REAL, FROM LOGS) ---
227
 
228
  @dataclass(frozen=True)
@@ -372,33 +420,12 @@ def run_sota_train():
372
 
373
  train_dataset = make_real_dataset()
374
 
375
- def quick_exec_eval(max_items: int = 8) -> float:
376
- """Sample prompts, generate completions, score with the same OpenEnv SQL reward."""
377
- subset = train_dataset.select(range(min(max_items, len(train_dataset))))
378
- prompts = subset["prompt"]
379
- task_ids = subset["task_id"]
380
-
381
- completions: List[str] = []
382
- for prompt in prompts:
383
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
384
- with torch.no_grad():
385
- out = model.generate(
386
- **inputs,
387
- max_new_tokens=256,
388
- do_sample=True,
389
- temperature=float(os.environ.get("EVAL_TEMPERATURE", "0.7")),
390
- top_p=float(os.environ.get("EVAL_TOP_P", "0.9")),
391
- renormalize_logits=True,
392
- remove_invalid_values=True,
393
- pad_token_id=tokenizer.eos_token_id,
394
- )
395
- completions.append(tokenizer.decode(out[0], skip_special_tokens=True))
396
-
397
- rewards = openenv_sql_reward_func(completions, task_ids)
398
- return float(sum(rewards) / max(len(rewards), 1))
399
-
400
  print("Quick baseline eval (pre-train)...")
401
- baseline_avg_reward = quick_exec_eval()
 
 
 
 
402
 
403
  report_to = _resolve_report_to()
404
  tb_dir = Path(out_dir) / "tensorboard"
@@ -452,7 +479,8 @@ def run_sota_train():
452
  trainer.train()
453
 
454
  print("Quick eval (post-train)...")
455
- post_avg_reward = quick_exec_eval()
 
456
 
457
  # --- Save artifacts (real logs/plots) ---
458
  artifacts = ArtifactPaths(root=Path(out_dir) / "artifacts")
@@ -474,6 +502,9 @@ def run_sota_train():
474
  "baseline_avg_reward": baseline_avg_reward,
475
  "post_avg_reward": post_avg_reward,
476
  "delta_avg_reward": post_avg_reward - baseline_avg_reward,
 
 
 
477
  "tensorboard_dir": str(tb_dir) if report_to == "tensorboard" else None,
478
  "report_to": report_to,
479
  }
@@ -497,20 +528,40 @@ def run_sota_train():
497
  except Exception as e:
498
  print(f"Could not generate before/after plot: {e}")
499
 
500
- lora_dir = os.environ.get("LORA_SAVE_DIR", "./sota_sql_agent_unsloth")
501
- print("\nSaving LoRA weights locally...")
502
- model.save_pretrained(lora_dir)
503
 
504
- hub_id = os.environ.get("HF_HUB_REPO_ID", "md896/sota-sql-agent-7b")
505
  token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
506
  if os.environ.get("SKIP_HUB_PUSH", "").strip() in ("1", "true", "yes"):
507
  print("SKIP_HUB_PUSH set — not pushing to Hub.")
508
  else:
509
  try:
510
  model.push_to_hub(hub_id, token=token)
511
- print(f"Pushed LoRA to https://huggingface.co/{hub_id}")
 
512
  except Exception as e:
513
- print(f"Hub push failed (set HF_TOKEN / HF_HUB_REPO_ID or SKIP_HUB_PUSH=1): {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  print(f"\nTraining artifacts under {artifacts.root}")
516
 
 
118
  import httpx
119
  import torch
120
  from datasets import Dataset
121
+ from huggingface_hub import HfApi
122
  from transformers import AutoModelForCausalLM, AutoTokenizer
123
  from trl import GRPOConfig, GRPOTrainer
124
 
 
189
  return Dataset.from_list(rows)
190
 
191
 
192
+ def make_task_dataset(task_id: str, rows_per_task: int) -> Dataset:
193
+ bridge = get_bridge_url()
194
+ timeout = get_request_timeout()
195
+ marker = os.environ.get("COMPLETION_SQL_MARKER", "Fixed SQL:")
196
+ with httpx.Client(base_url=bridge, headers=BYPASS_HEADERS, timeout=timeout) as client:
197
+ resp = client.post("/reset", json={"task_id": task_id})
198
+ resp.raise_for_status()
199
+ obs = resp.json()["observation"]
200
+ prompt = (
201
+ "Fix the following SQL query and provide only the fixed SQL.\n"
202
+ f"Task: {obs['task_description']}\n"
203
+ f"Broken Query: {obs['original_query']}\n"
204
+ f"{marker}"
205
+ )
206
+ rows = [{"prompt": prompt, "task_id": task_id} for _ in range(max(1, rows_per_task))]
207
+ return Dataset.from_list(rows)
208
+
209
+
210
  # --- 3. One live OpenEnv reward (colab_real_world style) ---
211
 
212
 
 
242
  return rewards
243
 
244
 
245
+ def eval_model_reward(
246
+ model: AutoModelForCausalLM,
247
+ tokenizer: AutoTokenizer,
248
+ dataset: Dataset,
249
+ *,
250
+ max_items: int,
251
+ ) -> float:
252
+ subset = dataset.select(range(min(max_items, len(dataset))))
253
+ prompts = subset["prompt"]
254
+ task_ids = subset["task_id"]
255
+ completions: List[str] = []
256
+ for prompt in prompts:
257
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
258
+ with torch.no_grad():
259
+ out = model.generate(
260
+ **inputs,
261
+ max_new_tokens=256,
262
+ do_sample=True,
263
+ temperature=float(os.environ.get("EVAL_TEMPERATURE", "0.7")),
264
+ top_p=float(os.environ.get("EVAL_TOP_P", "0.9")),
265
+ renormalize_logits=True,
266
+ remove_invalid_values=True,
267
+ pad_token_id=tokenizer.eos_token_id,
268
+ )
269
+ completions.append(tokenizer.decode(out[0], skip_special_tokens=True))
270
+ rewards = openenv_sql_reward_func(completions, task_ids)
271
+ return float(sum(rewards) / max(len(rewards), 1))
272
+
273
+
274
  # --- 3b. ARTIFACTS / PLOTS (REAL, FROM LOGS) ---
275
 
276
  @dataclass(frozen=True)
 
420
 
421
  train_dataset = make_real_dataset()
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  print("Quick baseline eval (pre-train)...")
424
+ baseline_avg_reward = eval_model_reward(model, tokenizer, train_dataset, max_items=8)
425
+
426
+ hard_eval_n = int(os.environ.get("HARD_EVAL_SAMPLES", "16"))
427
+ hard_dataset = make_task_dataset("hard_finance_explosion", rows_per_task=hard_eval_n)
428
+ base_hard_reward = eval_model_reward(model, tokenizer, hard_dataset, max_items=hard_eval_n)
429
 
430
  report_to = _resolve_report_to()
431
  tb_dir = Path(out_dir) / "tensorboard"
 
479
  trainer.train()
480
 
481
  print("Quick eval (post-train)...")
482
+ post_avg_reward = eval_model_reward(model, tokenizer, train_dataset, max_items=8)
483
+ trained_hard_reward = eval_model_reward(model, tokenizer, hard_dataset, max_items=hard_eval_n)
484
 
485
  # --- Save artifacts (real logs/plots) ---
486
  artifacts = ArtifactPaths(root=Path(out_dir) / "artifacts")
 
502
  "baseline_avg_reward": baseline_avg_reward,
503
  "post_avg_reward": post_avg_reward,
504
  "delta_avg_reward": post_avg_reward - baseline_avg_reward,
505
+ "base_hard_reward": base_hard_reward,
506
+ "trained_hard_reward": trained_hard_reward,
507
+ "delta_hard_reward": trained_hard_reward - base_hard_reward,
508
  "tensorboard_dir": str(tb_dir) if report_to == "tensorboard" else None,
509
  "report_to": report_to,
510
  }
 
528
  except Exception as e:
529
  print(f"Could not generate before/after plot: {e}")
530
 
531
+ model_dir = os.environ.get("MODEL_SAVE_DIR", "./sota_sql_agent_full")
532
+ print("\nSaving trained model locally...")
533
+ model.save_pretrained(model_dir)
534
 
535
+ hub_id = os.environ.get("MODEL_HUB_REPO_ID", os.environ.get("HF_HUB_REPO_ID", "md896/sql-debug-agent-qwen05b-grpo"))
536
  token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
537
  if os.environ.get("SKIP_HUB_PUSH", "").strip() in ("1", "true", "yes"):
538
  print("SKIP_HUB_PUSH set — not pushing to Hub.")
539
  else:
540
  try:
541
  model.push_to_hub(hub_id, token=token)
542
+ tokenizer.push_to_hub(hub_id, token=token)
543
+ print(f"Pushed trained model to https://huggingface.co/{hub_id}")
544
  except Exception as e:
545
+ print(f"Hub push failed (set HF_TOKEN / MODEL_HUB_REPO_ID or SKIP_HUB_PUSH=1): {e}")
546
+
547
+ # Upload run artifacts back to the Space repo so you can download/view them.
548
+ artifact_space = os.environ.get("ARTIFACT_SPACE_ID", "md896/sql-debug-env")
549
+ run_tag = time.strftime("%Y%m%d-%H%M%S")
550
+ try:
551
+ if token:
552
+ api = HfApi(token=token)
553
+ api.upload_folder(
554
+ repo_id=artifact_space,
555
+ repo_type="space",
556
+ folder_path=str(artifacts.root),
557
+ path_in_repo=f"artifacts/runs/{run_tag}",
558
+ commit_message=f"Add training artifacts {run_tag}",
559
+ )
560
+ print(f"Uploaded artifacts to https://huggingface.co/spaces/{artifact_space}/tree/main/artifacts/runs/{run_tag}")
561
+ else:
562
+ print("No HF token in job env; skipping artifact upload.")
563
+ except Exception as e:
564
+ print(f"Artifact upload failed: {e}")
565
 
566
  print(f"\nTraining artifacts under {artifacts.root}")
567