| |
| """Build and optionally deploy the final PolyGuard artifact Space. |
| |
| The script is intentionally packaging-only: it does not train or modify model |
| weights. It mirrors the best tracked evidence into docs/results, packages the |
| available model artifacts into a separate Hugging Face Space, and records |
| missing artifacts honestly in a manifest. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import html |
| import json |
| import os |
| from pathlib import Path |
| import shutil |
| from typing import Any |
|
|
| import matplotlib |
|
|
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
|
|
| from huggingface_hub import HfApi |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| DEFAULT_SPACE_ID = "adithya9903/polyguard-openenv-final-artifacts" |
| DEFAULT_DOCS_DIR = ROOT / "docs" / "results" / "final_submission_evidence" |
| DEFAULT_SPACE_DIR = Path("/tmp/polyguard-final-artifact-space") |
| EVIDENCE_DIR = ROOT / "docs" / "results" / "submission_evidence_qwen_0_5b_1_5b_3b" |
| SWEEP_REPORT_DIR = ROOT / "outputs" / "reports" / "sweeps" |
| SWEEP_CHECKPOINT_DIR = ROOT / "checkpoints" / "sweeps" |
|
|
|
|
| RUNS = { |
| "qwen-qwen2-5-0-5b-instruct": { |
| "label": "Qwen 0.5B", |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| }, |
| "qwen-qwen2-5-1-5b-instruct": { |
| "label": "Qwen 1.5B", |
| "model_id": "Qwen/Qwen2.5-1.5B-Instruct", |
| }, |
| "qwen-qwen2-5-3b-instruct": { |
| "label": "Qwen 3B", |
| "model_id": "Qwen/Qwen2.5-3B-Instruct", |
| }, |
| } |
|
|
|
|
| FRONTPAGE_CHARTS = { |
| "01_basic_llm_vs_full_pipeline_reward.png": ( |
| EVIDENCE_DIR / "charts" / "generated" / "basic_llm_vs_full_pipeline_reward.png" |
| ), |
| "02_reward_delta_by_seed.png": ( |
| EVIDENCE_DIR / "charts" / "generated" / "basic_llm_vs_full_pipeline_reward_delta_by_seed.png" |
| ), |
| "03_policy_ablation_reward.png": ( |
| EVIDENCE_DIR / "charts" / "generated" / "policy_ablation_avg_reward.png" |
| ), |
| "04_reward_components.png": ( |
| EVIDENCE_DIR / "charts" / "generated" / "reward_component_bars.png" |
| ), |
| "05_train_holdout_gap.png": ( |
| EVIDENCE_DIR / "charts" / "local_available_combined" / "train_holdout_gap.png" |
| ), |
| "06_inference_latency_validity.png": ( |
| EVIDENCE_DIR / "charts" / "local_available_combined" / "inference_latency_validity.png" |
| ), |
| "07_sft_vs_grpo_reward.png": ( |
| EVIDENCE_DIR / "charts" / "local_available_combined" / "sft_vs_grpo_reward.png" |
| ), |
| } |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser(description="Deploy the final PolyGuard artifact Space.") |
| parser.add_argument("--space-id", default=DEFAULT_SPACE_ID) |
| parser.add_argument("--docs-dir", default=str(DEFAULT_DOCS_DIR)) |
| parser.add_argument("--space-dir", default=str(DEFAULT_SPACE_DIR)) |
| parser.add_argument("--public", action="store_true", help="Create/update the Space as public.") |
| parser.add_argument("--deploy", action="store_true", help="Upload the Space bundle to Hugging Face.") |
| parser.add_argument("--skip-docs", action="store_true") |
| return parser.parse_args() |
|
|
|
|
| def load_json(path: Path, default: Any) -> Any: |
| if not path.exists(): |
| return default |
| try: |
| return json.loads(path.read_text(encoding="utf-8")) |
| except json.JSONDecodeError: |
| return default |
|
|
|
|
| def write_json(path: Path, payload: Any) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(json.dumps(payload, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") |
|
|
|
|
| def write_text(path: Path, text: str) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(text, encoding="utf-8") |
|
|
|
|
| def copy_file(src: Path, dst: Path) -> bool: |
| if not src.exists(): |
| return False |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copy2(src, dst) |
| return True |
|
|
|
|
| def copy_tree(src: Path, dst: Path) -> dict[str, Any]: |
| if not src.exists(): |
| return {"exists": False, "file_count": 0, "bytes": 0} |
| if dst.exists(): |
| shutil.rmtree(dst) |
| shutil.copytree(src, dst, ignore=shutil.ignore_patterns(".DS_Store", "__pycache__", "*.pyc")) |
| files = [path for path in dst.rglob("*") if path.is_file()] |
| return { |
| "exists": True, |
| "file_count": len(files), |
| "bytes": sum(path.stat().st_size for path in files), |
| } |
|
|
|
|
| def dir_size(path: Path) -> int: |
| if not path.exists(): |
| return 0 |
| return sum(item.stat().st_size for item in path.rglob("*") if item.is_file()) |
|
|
|
|
| def summarize_artifact_dir(path: Path) -> dict[str, Any]: |
| return { |
| "exists": path.exists(), |
| "file_count": len([p for p in path.rglob("*") if p.is_file()]) if path.exists() else 0, |
| "bytes": dir_size(path), |
| } |
|
|
|
|
| def plot_model_reward(summary: dict[str, Any], path: Path) -> None: |
| labels: list[str] = [] |
| sft: list[float] = [] |
| grpo: list[float | None] = [] |
| for model in summary.get("models", []): |
| metrics = model.get("metrics", {}) |
| labels.append(str(model.get("label") or model.get("run_id"))) |
| sft.append(float(metrics.get("sft_avg_env_reward") or 0.0)) |
| value = metrics.get("grpo_avg_env_reward") |
| grpo.append(float(value) if value is not None else None) |
|
|
| if not labels: |
| return |
| path.parent.mkdir(parents=True, exist_ok=True) |
| x = list(range(len(labels))) |
| width = 0.35 |
| plt.figure(figsize=(9.5, 5)) |
| plt.bar([i - width / 2 for i in x], sft, width=width, label="SFT baseline") |
| grpo_values = [value if value is not None else 0.0 for value in grpo] |
| plt.bar([i + width / 2 for i in x], grpo_values, width=width, label="GRPO policy") |
| for i, value in enumerate(grpo): |
| if value is None: |
| plt.text(i + width / 2, 0.025, "pending", ha="center", rotation=90, fontsize=8) |
| plt.ylim(0, 1) |
| plt.ylabel("Verifier reward") |
| plt.title("SFT Baseline vs GRPO Policy Reward") |
| plt.xticks(x, labels) |
| plt.legend() |
| plt.tight_layout() |
| plt.savefig(path, dpi=180) |
| plt.close() |
|
|
|
|
| def plot_sft_loss(summary: dict[str, Any], path: Path) -> None: |
| labels: list[str] = [] |
| values: list[float] = [] |
| for model in summary.get("models", []): |
| labels.append(str(model.get("label") or model.get("run_id"))) |
| values.append(float(model.get("metrics", {}).get("sft_train_loss") or 0.0)) |
| if not labels: |
| return |
| path.parent.mkdir(parents=True, exist_ok=True) |
| plt.figure(figsize=(9.5, 5)) |
| plt.bar(labels, values, color=["#315f72", "#8a5a44", "#2f6f4e"][: len(labels)]) |
| plt.ylabel("Final SFT train loss") |
| plt.title("SFT Training Loss By Qwen Size") |
| plt.tight_layout() |
| plt.savefig(path, dpi=180) |
| plt.close() |
|
|
|
|
| def plot_grpo_curve(history_path: Path, output: Path) -> None: |
| rows = load_json(history_path, []) |
| points = [ |
| (int(row.get("step") or idx + 1), float(row.get("reward"))) |
| for idx, row in enumerate(rows) |
| if isinstance(row, dict) and row.get("reward") is not None |
| ] |
| if not points: |
| return |
| output.parent.mkdir(parents=True, exist_ok=True) |
| steps, rewards = zip(*points) |
| window = 50 |
| smooth = [] |
| for idx in range(len(rewards)): |
| start = max(0, idx - window + 1) |
| smooth.append(sum(rewards[start : idx + 1]) / (idx - start + 1)) |
| plt.figure(figsize=(10, 5)) |
| plt.plot(steps, rewards, alpha=0.18, label="step reward") |
| plt.plot(steps, smooth, linewidth=2.0, label="rolling mean (50)") |
| plt.ylim(0, 1) |
| plt.xlabel("GRPO step") |
| plt.ylabel("Verifier reward") |
| plt.title("Qwen 3B GRPO Reward Curve") |
| plt.legend() |
| plt.tight_layout() |
| plt.savefig(output, dpi=180) |
| plt.close() |
|
|
|
|
| def artifact_availability() -> dict[str, Any]: |
| availability: dict[str, Any] = {} |
| for run_id, meta in RUNS.items(): |
| checkpoint_dir = SWEEP_CHECKPOINT_DIR / run_id |
| report_dir = SWEEP_REPORT_DIR / run_id |
| sft_adapter = checkpoint_dir / "sft_adapter" |
| grpo_adapter = checkpoint_dir / "grpo_adapter" |
| availability[run_id] = { |
| "label": meta["label"], |
| "model_id": meta["model_id"], |
| "checkpoint_tree": summarize_artifact_dir(checkpoint_dir), |
| "sft_adapter": summarize_artifact_dir(sft_adapter), |
| "grpo_adapter": summarize_artifact_dir(grpo_adapter), |
| "reports": summarize_artifact_dir(report_dir), |
| "sft_report": (report_dir / "sft_trl_run.json").exists(), |
| "grpo_report": (report_dir / "grpo_trl_run.json").exists(), |
| "postsave_sft": (report_dir / "postsave_inference_sft.json").exists(), |
| "postsave_grpo": (report_dir / "postsave_inference_grpo.json").exists(), |
| "policy_ablation": (report_dir / "grpo_ablation_report.json").exists(), |
| } |
| missing: list[str] = [] |
| if not sft_adapter.exists(): |
| missing.append("sft_adapter") |
| if not grpo_adapter.exists(): |
| missing.append("grpo_adapter") |
| availability[run_id]["missing_trained_files"] = missing |
| availability[run_id]["status"] = "complete" if not missing else "reports_only_or_partial" |
| return availability |
|
|
|
|
| def build_docs(docs_dir: Path, manifest: dict[str, Any]) -> None: |
| if docs_dir.exists(): |
| shutil.rmtree(docs_dir) |
| (docs_dir / "charts" / "frontpage").mkdir(parents=True, exist_ok=True) |
| (docs_dir / "charts" / "all").mkdir(parents=True, exist_ok=True) |
| (docs_dir / "reports").mkdir(parents=True, exist_ok=True) |
|
|
| summary = load_json(EVIDENCE_DIR / "submission_summary.json", {}) |
| plot_model_reward(summary, docs_dir / "charts" / "frontpage" / "00_sft_vs_grpo_reward_by_model.png") |
| plot_sft_loss(summary, docs_dir / "charts" / "frontpage" / "08_sft_loss_by_model.png") |
| plot_grpo_curve( |
| SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_history.json", |
| docs_dir / "charts" / "frontpage" / "09_qwen_3b_grpo_reward_curve.png", |
| ) |
|
|
| copied: list[str] = [] |
| for name, source in FRONTPAGE_CHARTS.items(): |
| if copy_file(source, docs_dir / "charts" / "frontpage" / name): |
| copied.append(name) |
|
|
| for source_dir in [ |
| EVIDENCE_DIR / "charts" / "generated", |
| EVIDENCE_DIR / "charts" / "local_available_combined", |
| ]: |
| if source_dir.exists(): |
| for item in sorted(source_dir.glob("*.png")): |
| copy_file(item, docs_dir / "charts" / "all" / item.name) |
|
|
| report_sources = [ |
| EVIDENCE_DIR / "submission_summary.json", |
| EVIDENCE_DIR / "reports" / "basic_llm_vs_polyguard_report.json", |
| EVIDENCE_DIR / "reports" / "policy_ablation_report.json", |
| EVIDENCE_DIR / "reports" / "basic_llm_failure_cases.md", |
| EVIDENCE_DIR / "reports" / "action_traces.jsonl", |
| SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_trl_run.json", |
| SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "postsave_inference_grpo.json", |
| SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_ablation_report.json", |
| ] |
| for source in report_sources: |
| copy_file(source, docs_dir / "reports" / source.name) |
|
|
| write_json(docs_dir / "manifest.json", manifest) |
| write_text(docs_dir / "README.md", final_docs_readme(manifest)) |
|
|
|
|
| def final_docs_readme(manifest: dict[str, Any]) -> str: |
| availability = manifest["artifact_availability"] |
| rows = [] |
| for run_id, data in availability.items(): |
| rows.append( |
| "| {label} | {sft} | {grpo} | {checkpoints} | {reports} | {status} |".format( |
| label=data["label"], |
| sft="yes" if data["sft_adapter"]["exists"] else "missing", |
| grpo="yes" if data["grpo_adapter"]["exists"] else "missing", |
| checkpoints="yes" if data["checkpoint_tree"]["exists"] else "missing", |
| reports="yes" if data["reports"]["exists"] else "missing", |
| status=data["status"], |
| ) |
| ) |
| return """# PolyGuard Final Submission Evidence |
| |
| This folder is the current curated evidence set for the final submission. It |
| replaces the earlier Qwen 0.5B/1.5B-only view with a single location for the |
| best charts, reports, action traces, and model-artifact availability. |
| |
| ## Hugging Face Artifact Space |
| |
| - Space: [{space_id}](https://huggingface.co/spaces/{space_id}) |
| - Download command: |
| |
| ```bash |
| HF_TOKEN=<token> ./.venv/bin/hf download {space_id} --repo-type space --local-dir ./hf_final_artifacts |
| ``` |
| |
| ## Artifact Availability |
| |
| | Model | SFT adapter | GRPO adapter | Checkpoints | Reports | Status | |
| | --- | --- | --- | --- | --- | --- | |
| {rows} |
| |
| Qwen 0.5B and 1.5B currently have SFT histories/reports and post-save SFT |
| evidence in this repository, but no downloadable SFT/GRPO adapter directories |
| were present in the local checkout or authenticated artifact repos at packaging |
| time. Qwen 3B has both SFT and GRPO adapters, checkpoint metadata/intermediate |
| checkpoints, GRPO history, post-save GRPO inference, and policy ablation |
| evidence. |
| |
| ## Frontpage Charts |
| |
| - `charts/frontpage/00_sft_vs_grpo_reward_by_model.png` |
| - `charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png` |
| - `charts/frontpage/02_reward_delta_by_seed.png` |
| - `charts/frontpage/03_policy_ablation_reward.png` |
| - `charts/frontpage/04_reward_components.png` |
| - `charts/frontpage/05_train_holdout_gap.png` |
| - `charts/frontpage/06_inference_latency_validity.png` |
| - `charts/frontpage/07_sft_vs_grpo_reward.png` |
| - `charts/frontpage/08_sft_loss_by_model.png` |
| - `charts/frontpage/09_qwen_3b_grpo_reward_curve.png` |
| |
| ## Improvement Evidence |
| |
| - Basic LLM proxy vs full PolyGuard pipeline reward delta: |
| `{delta}` average reward. |
| - Full pipeline legality rate: `{pipeline_legality}`. |
| - Basic LLM failure/exploit rate: `{basic_failure_rate}`. |
| - Full pipeline failure/exploit rate: `{pipeline_failure_rate}`. |
| |
| Reward values in the tracked API/reports remain numeric and clamped to |
| `[0.001, 0.999]` at three decimal precision. |
| """.format( |
| space_id=manifest["space_id"], |
| rows="\n".join(rows), |
| delta=manifest.get("basic_vs_pipeline", {}).get("reward_delta"), |
| pipeline_legality=manifest.get("basic_vs_pipeline", {}).get("pipeline_legality"), |
| basic_failure_rate=manifest.get("basic_vs_pipeline", {}).get("basic_failure_rate"), |
| pipeline_failure_rate=manifest.get("basic_vs_pipeline", {}).get("pipeline_failure_rate"), |
| ) |
|
|
|
|
| def build_space(space_dir: Path, manifest: dict[str, Any]) -> None: |
| if space_dir.exists(): |
| shutil.rmtree(space_dir) |
| space_dir.mkdir(parents=True) |
| write_text( |
| space_dir / "README.md", |
| """--- |
| title: PolyGuard Final Artifacts |
| sdk: static |
| pinned: false |
| --- |
| |
| # PolyGuard Final Artifacts |
| |
| This Space stores the final PolyGuard evidence bundle and the available trained |
| adapter artifacts. It is separate from the training Spaces and does not run |
| training. |
| |
| Open `index.html` or inspect the `artifacts/`, `reports/`, and `evidence/` |
| folders in the Space file browser. |
| """, |
| ) |
| write_text( |
| space_dir / ".gitattributes", |
| """*.safetensors filter=lfs diff=lfs merge=lfs -text |
| *.bin filter=lfs diff=lfs merge=lfs -text |
| *.pt filter=lfs diff=lfs merge=lfs -text |
| *.zip filter=lfs diff=lfs merge=lfs -text |
| """, |
| ) |
| write_json(space_dir / "manifest.json", manifest) |
|
|
| evidence_target = space_dir / "evidence" / "final_submission_evidence" |
| copy_tree(Path(manifest["docs_dir"]), evidence_target) |
|
|
| for run_id in RUNS: |
| checkpoint_dir = SWEEP_CHECKPOINT_DIR / run_id |
| report_dir = SWEEP_REPORT_DIR / run_id |
| if checkpoint_dir.exists(): |
| copy_tree(checkpoint_dir, space_dir / "checkpoints" / run_id) |
| for stage in ["sft_adapter", "grpo_adapter"]: |
| source = checkpoint_dir / stage |
| if source.exists(): |
| copy_tree(source, space_dir / "artifacts" / run_id / stage) |
| if report_dir.exists(): |
| copy_tree(report_dir, space_dir / "reports" / run_id) |
|
|
| write_text(space_dir / "index.html", index_html(manifest)) |
|
|
|
|
| def index_html(manifest: dict[str, Any]) -> str: |
| rows = [] |
| for run_id, data in manifest["artifact_availability"].items(): |
| rows.append( |
| "<tr><td>{label}</td><td>{sft}</td><td>{grpo}</td><td>{checkpoints}</td><td>{reports}</td><td>{status}</td></tr>".format( |
| label=html.escape(data["label"]), |
| sft="available" if data["sft_adapter"]["exists"] else "missing", |
| grpo="available" if data["grpo_adapter"]["exists"] else "missing", |
| checkpoints="available" if data["checkpoint_tree"]["exists"] else "missing", |
| reports="available" if data["reports"]["exists"] else "missing", |
| status=html.escape(data["status"]), |
| ) |
| ) |
| return """<!doctype html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <title>PolyGuard Final Artifacts</title> |
| <style> |
| body {{ font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; margin: 40px; line-height: 1.5; color: #17212b; }} |
| table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }} |
| th, td {{ border-bottom: 1px solid #d8dee4; padding: 10px; text-align: left; }} |
| code {{ background: #f4f6f8; padding: 2px 5px; border-radius: 4px; }} |
| .grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 16px; }} |
| .panel {{ border: 1px solid #d8dee4; padding: 16px; border-radius: 6px; }} |
| </style> |
| </head> |
| <body> |
| <h1>PolyGuard Final Artifacts</h1> |
| <p>This Space stores the final evidence bundle and available trained adapters. It does not retrain models.</p> |
| <table> |
| <thead><tr><th>Model</th><th>SFT adapter</th><th>GRPO adapter</th><th>Checkpoints</th><th>Reports</th><th>Status</th></tr></thead> |
| <tbody>{rows}</tbody> |
| </table> |
| <div class="grid"> |
| <div class="panel"><strong>Evidence</strong><br /><code>evidence/final_submission_evidence/</code></div> |
| <div class="panel"><strong>Adapters</strong><br /><code>artifacts/qwen-qwen2-5-3b-instruct/</code></div> |
| <div class="panel"><strong>Checkpoints</strong><br /><code>checkpoints/qwen-qwen2-5-3b-instruct/</code></div> |
| <div class="panel"><strong>Reports</strong><br /><code>reports/</code></div> |
| <div class="panel"><strong>Manifest</strong><br /><code>manifest.json</code></div> |
| </div> |
| </body> |
| </html> |
| """.format(rows="\n".join(rows)) |
|
|
|
|
| def deploy_space(space_id: str, space_dir: Path, public: bool) -> None: |
| token = os.getenv("HF_TOKEN") |
| if not token: |
| raise SystemExit("HF_TOKEN is required for --deploy") |
| api = HfApi(token=token) |
| api.create_repo( |
| repo_id=space_id, |
| repo_type="space", |
| space_sdk="static", |
| private=not public, |
| exist_ok=True, |
| ) |
| ignore_patterns = [".DS_Store", "**/.DS_Store", "__pycache__/*", "*.pyc", ".cache/*", ".cache/**"] |
| if dir_size(space_dir) > 100 * 1024 * 1024: |
| api.upload_folder( |
| repo_id=space_id, |
| repo_type="space", |
| folder_path=str(space_dir), |
| commit_message="Upload PolyGuard final evidence and adapters", |
| ignore_patterns=ignore_patterns + ["checkpoints/*", "checkpoints/**"], |
| ) |
| checkpoint_root = space_dir / "checkpoints" |
| for run_dir in sorted(path for path in checkpoint_root.glob("*") if path.is_dir()): |
| for file_path in sorted(path for path in run_dir.iterdir() if path.is_file()): |
| api.upload_file( |
| repo_id=space_id, |
| repo_type="space", |
| path_or_fileobj=str(file_path), |
| path_in_repo=f"checkpoints/{run_dir.name}/{file_path.name}", |
| commit_message=f"Upload {run_dir.name} checkpoint metadata", |
| ) |
| for subdir in sorted(path for path in run_dir.iterdir() if path.is_dir()): |
| nested_dirs = sorted(path for path in subdir.iterdir() if path.is_dir()) |
| if nested_dirs: |
| for file_path in sorted(path for path in subdir.iterdir() if path.is_file()): |
| api.upload_file( |
| repo_id=space_id, |
| repo_type="space", |
| path_or_fileobj=str(file_path), |
| path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}/{file_path.name}", |
| commit_message=f"Upload {run_dir.name} {subdir.name} metadata", |
| ) |
| for nested in nested_dirs: |
| api.upload_folder( |
| repo_id=space_id, |
| repo_type="space", |
| folder_path=str(nested), |
| path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}/{nested.name}", |
| commit_message=f"Upload {run_dir.name} {subdir.name}/{nested.name}", |
| ignore_patterns=ignore_patterns, |
| ) |
| else: |
| api.upload_folder( |
| repo_id=space_id, |
| repo_type="space", |
| folder_path=str(subdir), |
| path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}", |
| commit_message=f"Upload {run_dir.name} {subdir.name}", |
| ignore_patterns=ignore_patterns, |
| ) |
| else: |
| api.upload_folder( |
| repo_id=space_id, |
| repo_type="space", |
| folder_path=str(space_dir), |
| commit_message="Upload PolyGuard final evidence and trained adapters", |
| ignore_patterns=ignore_patterns, |
| ) |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
| docs_dir = Path(args.docs_dir) |
| space_dir = Path(args.space_dir) |
|
|
| summary = load_json(EVIDENCE_DIR / "submission_summary.json", {}) |
| basic = load_json(EVIDENCE_DIR / "reports" / "basic_llm_vs_polyguard_report.json", {}) |
| basic_summary = basic.get("summaries", {}) |
| manifest = { |
| "status": "ok", |
| "space_id": args.space_id, |
| "space_url": f"https://huggingface.co/spaces/{args.space_id}", |
| "docs_dir": str(docs_dir.relative_to(ROOT) if docs_dir.is_relative_to(ROOT) else docs_dir), |
| "evidence_source": str(EVIDENCE_DIR.relative_to(ROOT)), |
| "artifact_availability": artifact_availability(), |
| "submission_models": summary.get("models", []), |
| "basic_vs_pipeline": { |
| "reward_delta": basic.get("pipeline_minus_basic_reward_delta"), |
| "basic_reward": basic_summary.get("basic_llm", {}).get("avg_reward"), |
| "pipeline_reward": basic_summary.get("full_polyguard_pipeline", {}).get("avg_reward"), |
| "basic_failure_rate": basic_summary.get("basic_llm", {}).get("exploit_or_failure_rate"), |
| "pipeline_failure_rate": basic_summary.get("full_polyguard_pipeline", {}).get("exploit_or_failure_rate"), |
| "pipeline_legality": basic_summary.get("full_polyguard_pipeline", {}).get("legality_rate"), |
| }, |
| "download_command": ( |
| f"HF_TOKEN=<token> ./.venv/bin/hf download {args.space_id} " |
| "--repo-type space --local-dir ./hf_final_artifacts" |
| ), |
| "notes": [ |
| "Packaging-only run; no retraining is performed.", |
| "Qwen 3B has SFT and GRPO adapter directories plus checkpoint metadata/intermediate checkpoints in this artifact Space.", |
| "Qwen 0.5B and 1.5B adapter directories were not present locally or in the checked artifact repos; reports remain included.", |
| ], |
| } |
|
|
| if not args.skip_docs: |
| build_docs(docs_dir, manifest) |
| manifest = load_json(docs_dir / "manifest.json", manifest) |
| build_space(space_dir, manifest) |
|
|
| if args.deploy: |
| deploy_space(args.space_id, space_dir, public=args.public) |
|
|
| print(json.dumps({"status": "ok", "space_url": manifest["space_url"], "space_dir": str(space_dir), "docs_dir": str(docs_dir)}, indent=2)) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|