Spaces:

Sizzing
/

aws_rl_env

Running

App Files Files Community

aws_rl_env / scripts /generate_blog_figures.py

Sizzing

Upload folder using huggingface_hub

884f069 verified 17 days ago

raw

history blame contribute delete

10.3 kB

	"""Generate the 4 new PNG figures embedded in blog.md.

	Outputs (idempotent):
	docs/figures/blog_hero.png
	docs/figures/tier_pyramid.png
	docs/figures/dataset_composition.png
	docs/figures/reward_components.png

	Run from repo root:
	.venv/bin/python scripts/generate_blog_figures.py
	"""

	from __future__ import annotations

	from pathlib import Path

	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatches
	from matplotlib.patches import FancyBboxPatch
	import numpy as np

	REPO_ROOT = Path(__file__).resolve().parents[1]
	FIG_DIR = REPO_ROOT / "docs" / "figures"
	FIG_DIR.mkdir(parents=True, exist_ok=True)

	PINK = "#ff4f8b"
	PINK_DARK = "#c81b5a"
	INK = "#1a1a1a"
	SLATE = "#525a66"
	PAPER = "#fff7fa"
	GRID = "#e8d6df"

	PALETTE = ["#3a86ff", "#8338ec", "#ff006e", "#fb5607", "#ffbe0b"]


	def _save(fig: plt.Figure, name: str) -> None:
	out = FIG_DIR / name
	fig.savefig(out, dpi=160, bbox_inches="tight", facecolor=fig.get_facecolor())
	plt.close(fig)
	print(f"wrote {out.relative_to(REPO_ROOT)}")


	def hero() -> None:
	fig, ax = plt.subplots(figsize=(12, 5.2))
	fig.patch.set_facecolor(PAPER)
	ax.set_facecolor(PAPER)
	ax.set_xlim(0, 12)
	ax.set_ylim(0, 5.2)
	ax.axis("off")

	ax.text(0.45, 4.55, "AWS Cloud Operations RL", fontsize=15,
	color=PINK_DARK, fontweight="bold", family="DejaVu Sans")
	ax.text(0.45, 3.85, "From Cloud Chaos to Capable Agents",
	fontsize=30, color=INK, fontweight="bold", family="DejaVu Sans")
	ax.text(0.45, 3.25, "Training an LLM SRE on 120+ AWS Tasks with SFT \u2192 GRPO",
	fontsize=15, color=SLATE, family="DejaVu Sans", style="italic")

	stats = [
	("120+", "AWS tasks\n5 tiers + drift"),
	("8\u00d7", "parallel rollouts\n1 GPU"),
	("8", "anti-hacking\nlayers"),
	("39\u219289%", "exact-match\npost-SFT"),
	]
	box_w = 2.55
	gap = 0.2
	start_x = 0.45
	y = 0.55
	h = 2.1
	for i, (big, small) in enumerate(stats):
	x = start_x + i * (box_w + gap)
	box = FancyBboxPatch(
	(x, y), box_w, h,
	boxstyle="round,pad=0.04,rounding_size=0.18",
	linewidth=1.5, edgecolor=PINK, facecolor="white",
	)
	ax.add_patch(box)
	ax.text(x + box_w / 2, y + h * 0.62, big,
	fontsize=26, color=PINK_DARK, fontweight="bold",
	ha="center", va="center")
	ax.text(x + box_w / 2, y + h * 0.22, small,
	fontsize=10.5, color=SLATE, ha="center", va="center")

	ax.text(11.55, 4.55, "OpenEnv Hackathon \u2022 Apr 2026",
	fontsize=10, color=SLATE, ha="right", style="italic")

	_save(fig, "blog_hero.png")


	def tier_pyramid() -> None:
	# Top of pyramid (apex, narrow, hardest) \u2192 bottom (base, widest, easiest).
	tiers_top_down = [
	("Expert", 24, "30%", "state_checks", PALETTE[2]),
	("Advanced", 25, "30%", "multi_step+services", PALETTE[1]),
	("Intermediate", 25, "20%", "multi_step", PALETTE[0]),
	("Beginner", 25, "10%", "resource_creation", "#06b6d4"),
	("Warmup", 25, "10%", "command_match", "#22c55e"),
	]
	fig, (ax, ax2) = plt.subplots(1, 2, figsize=(14, 6),
	gridspec_kw={"width_ratios": [3.2, 1]})
	fig.patch.set_facecolor("white")

	n = len(tiers_top_down)
	ax.set_xlim(-1.15, 1.15)
	ax.set_ylim(-0.2, n + 0.4)
	ax.axis("off")
	ax.set_title("Curriculum: 124 tasks across 5 tiers", fontsize=15,
	fontweight="bold", color=INK, pad=12)

	for i, (name, count, chaos, strat, color) in enumerate(tiers_top_down):
	# i=0 \u2192 apex (top, narrowest); i=n-1 \u2192 base (bottom, widest)
	y_top = n - i
	y_bot = n - i - 1
	half_top = 0.45 + 0.55 * (i / (n - 1)) # narrow at apex
	half_bot = 0.45 + 0.55 * ((i + 1) / (n - 1)) # wider at base
	ax.add_patch(
	mpatches.Polygon(
	[(-half_bot, y_bot), (half_bot, y_bot),
	(half_top, y_top), (-half_top, y_top)],
	closed=True, facecolor=color, edgecolor="white",
	linewidth=2, alpha=0.95,
	)
	)
	y_mid = (y_top + y_bot) / 2
	ax.text(0, y_mid + 0.18, name, fontsize=14, fontweight="bold",
	color="white", ha="center", va="center")
	ax.text(0, y_mid - 0.18,
	f"{count} tasks \u00b7 chaos {chaos} \u00b7 {strat}",
	fontsize=9.5, color="white", ha="center", va="center", alpha=0.97)

	# Drift sidebar (right panel)
	ax2.set_xlim(0, 1)
	ax2.set_ylim(0, n + 0.4)
	ax2.axis("off")
	ax2.set_title("Adversarial track", fontsize=13, fontweight="bold",
	color=INK, pad=12)

	box = FancyBboxPatch(
	(0.08, 1.7), 0.84, 1.7,
	boxstyle="round,pad=0.04,rounding_size=0.10",
	facecolor=PINK, edgecolor=PINK_DARK, linewidth=2, alpha=0.92,
	)
	ax2.add_patch(box)
	ax2.text(0.5, 3.0, "Drift", fontsize=20, fontweight="bold",
	color="white", ha="center")
	ax2.text(0.5, 2.6, "9 tasks", fontsize=12, color="white", ha="center")
	ax2.text(0.5, 2.05, "2\u20133 mutations\nrandomized\nper episode",
	fontsize=9.5, color="white", ha="center", va="center")

	ax2.text(0.5, 0.85,
	"Promotion paths\n\u2014\nstandard: min episodes + rate\nfast-track: 3 consecutive \u22650.9",
	fontsize=9, color=SLATE, ha="center", va="center")

	_save(fig, "tier_pyramid.png")


	def dataset_composition() -> None:
	traj_labels = ["success", "continuation", "failure recovery",
	"verification", "hint usage"]
	traj_sizes = [55, 20, 15, 5, 5]

	# Expert excluded entirely \u2014 0% is meaningless on a donut.
	tier_labels = ["warmup", "beginner", "intermediate", "advanced"]
	tier_sizes = [50, 30, 15, 5]

	fig, axes = plt.subplots(1, 2, figsize=(15.5, 6))
	fig.patch.set_facecolor("white")
	fig.suptitle("SFT dataset composition \u2022 1,500 rows",
	fontsize=16, fontweight="bold", color=INK, y=1.02)
	fig.subplots_adjust(wspace=0.7, left=0.04, right=0.96)

	def donut(ax, sizes, labels, title, colors, center_label):
	wedges, _ = ax.pie(
	sizes, labels=None, colors=colors,
	wedgeprops={"width": 0.42, "edgecolor": "white", "linewidth": 2},
	startangle=90,
	)
	ax.set_title(title, fontsize=13, fontweight="bold", color=INK, pad=10)
	legend_labels = [f"{l} \u2014 {s}%" for l, s in zip(labels, sizes)]
	ax.legend(wedges, legend_labels, loc="center left",
	bbox_to_anchor=(1.05, 0.5), frameon=False, fontsize=11)
	ax.text(0, 0, center_label, fontsize=14, fontweight="bold",
	color=INK, ha="center", va="center")

	donut(axes[0], traj_sizes, traj_labels, "Trajectory types",
	["#22c55e", "#3a86ff", "#fb5607", "#8338ec", "#ffbe0b"],
	"5 types")
	donut(axes[1], tier_sizes, tier_labels, "Tier weights",
	["#22c55e", "#06b6d4", PALETTE[0], PALETTE[1]],
	"4 tiers\n+ expert*")

	fig.text(
	0.5, -0.04,
	"* expert tasks excluded from SFT (randomized state checks \u2192 no canonical script). "
	"GRPO handles them via live reward signal.",
	fontsize=10, color=SLATE, ha="center", style="italic",
	)

	_save(fig, "dataset_composition.png")


	def reward_components() -> None:
	components = [
	("task achieved", 1.00, "+", "achieve"),
	("chaos survival", 0.05, "+", "achieve"),
	("partial progress", 0.80, "+", "shape"),
	("progress delta", 0.10, "+", "shape"),
	("idempotent retry", 0.02, "+", "shape"),
	("rollback (per pair)", 0.10, "-", "penalty"),
	("command failed", 0.50, "-", "penalty"),
	("hint decay (n=3)", 0.39, "-", "penalty"),
	]
	color_map = {
	"achieve": "#22c55e",
	"shape": PALETTE[0],
	"penalty": PINK,
	}

	labels = [c[0] for c in components]
	values = [c[1] if c[2] == "+" else -c[1] for c in components]
	colors = [color_map[c[3]] for c in components]
	signed = [f"{c[2]}{c[1]:.2f}" for c in components]

	fig, ax = plt.subplots(figsize=(11.5, 5.8))
	fig.patch.set_facecolor("white")

	y_pos = np.arange(len(labels))[::-1]
	ax.barh(y_pos, values, color=colors, edgecolor="white", linewidth=1.5,
	height=0.72, alpha=0.92)

	for y, v, txt in zip(y_pos, values, signed):
	offset = 0.025 if v >= 0 else -0.025
	ha = "left" if v >= 0 else "right"
	ax.text(v + offset, y, txt, va="center", ha=ha,
	fontsize=11, color=INK, fontweight="bold")

	ax.set_yticks(y_pos)
	ax.set_yticklabels(labels, fontsize=11.5, color=INK)
	ax.axvline(0, color=INK, linewidth=1)
	ax.set_xlim(-0.65, 1.18)
	ax.set_xlabel("contribution to reward", fontsize=10.5, color=SLATE)
	ax.set_title("Reward shaping: every modifier the agent can earn or lose",
	fontsize=14, fontweight="bold", color=INK, pad=12)
	ax.spines["top"].set_visible(False)
	ax.spines["right"].set_visible(False)
	ax.spines["left"].set_color(GRID)
	ax.spines["bottom"].set_color(GRID)
	ax.tick_params(axis="x", colors=SLATE)
	ax.grid(axis="x", color=GRID, linewidth=0.8, alpha=0.6, zorder=0)
	ax.set_axisbelow(True)

	legend_handles = [
	mpatches.Patch(color="#22c55e", label="achievement (full reward)"),
	mpatches.Patch(color=PALETTE[0], label="dense shaping signal"),
	mpatches.Patch(color=PINK, label="penalty / decay"),
	]
	ax.legend(handles=legend_handles, loc="lower right", frameon=False, fontsize=10)

	fig.text(
	0.5, -0.04,
	"Final reward is clamped to [0.0, 0.99] before completion (1.0 reserved for "
	"verified achievement). Hint decay applied last as a multiplier (0.85^n).",
	fontsize=9.5, color=SLATE, ha="center", style="italic",
	)

	_save(fig, "reward_components.png")


	def main() -> None:
	hero()
	tier_pyramid()
	dataset_composition()
	reward_components()


	if __name__ == "__main__":
	main()