aws_rl_env / scripts /generate_blog_figures.py
Sizzing's picture
Upload folder using huggingface_hub
884f069 verified
"""Generate the 4 new PNG figures embedded in blog.md.
Outputs (idempotent):
docs/figures/blog_hero.png
docs/figures/tier_pyramid.png
docs/figures/dataset_composition.png
docs/figures/reward_components.png
Run from repo root:
.venv/bin/python scripts/generate_blog_figures.py
"""
from __future__ import annotations
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import FancyBboxPatch
import numpy as np
REPO_ROOT = Path(__file__).resolve().parents[1]
FIG_DIR = REPO_ROOT / "docs" / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)
PINK = "#ff4f8b"
PINK_DARK = "#c81b5a"
INK = "#1a1a1a"
SLATE = "#525a66"
PAPER = "#fff7fa"
GRID = "#e8d6df"
PALETTE = ["#3a86ff", "#8338ec", "#ff006e", "#fb5607", "#ffbe0b"]
def _save(fig: plt.Figure, name: str) -> None:
out = FIG_DIR / name
fig.savefig(out, dpi=160, bbox_inches="tight", facecolor=fig.get_facecolor())
plt.close(fig)
print(f"wrote {out.relative_to(REPO_ROOT)}")
def hero() -> None:
fig, ax = plt.subplots(figsize=(12, 5.2))
fig.patch.set_facecolor(PAPER)
ax.set_facecolor(PAPER)
ax.set_xlim(0, 12)
ax.set_ylim(0, 5.2)
ax.axis("off")
ax.text(0.45, 4.55, "AWS Cloud Operations RL", fontsize=15,
color=PINK_DARK, fontweight="bold", family="DejaVu Sans")
ax.text(0.45, 3.85, "From Cloud Chaos to Capable Agents",
fontsize=30, color=INK, fontweight="bold", family="DejaVu Sans")
ax.text(0.45, 3.25, "Training an LLM SRE on 120+ AWS Tasks with SFT \u2192 GRPO",
fontsize=15, color=SLATE, family="DejaVu Sans", style="italic")
stats = [
("120+", "AWS tasks\n5 tiers + drift"),
("8\u00d7", "parallel rollouts\n1 GPU"),
("8", "anti-hacking\nlayers"),
("39\u219289%", "exact-match\npost-SFT"),
]
box_w = 2.55
gap = 0.2
start_x = 0.45
y = 0.55
h = 2.1
for i, (big, small) in enumerate(stats):
x = start_x + i * (box_w + gap)
box = FancyBboxPatch(
(x, y), box_w, h,
boxstyle="round,pad=0.04,rounding_size=0.18",
linewidth=1.5, edgecolor=PINK, facecolor="white",
)
ax.add_patch(box)
ax.text(x + box_w / 2, y + h * 0.62, big,
fontsize=26, color=PINK_DARK, fontweight="bold",
ha="center", va="center")
ax.text(x + box_w / 2, y + h * 0.22, small,
fontsize=10.5, color=SLATE, ha="center", va="center")
ax.text(11.55, 4.55, "OpenEnv Hackathon \u2022 Apr 2026",
fontsize=10, color=SLATE, ha="right", style="italic")
_save(fig, "blog_hero.png")
def tier_pyramid() -> None:
# Top of pyramid (apex, narrow, hardest) \u2192 bottom (base, widest, easiest).
tiers_top_down = [
("Expert", 24, "30%", "state_checks", PALETTE[2]),
("Advanced", 25, "30%", "multi_step+services", PALETTE[1]),
("Intermediate", 25, "20%", "multi_step", PALETTE[0]),
("Beginner", 25, "10%", "resource_creation", "#06b6d4"),
("Warmup", 25, "10%", "command_match", "#22c55e"),
]
fig, (ax, ax2) = plt.subplots(1, 2, figsize=(14, 6),
gridspec_kw={"width_ratios": [3.2, 1]})
fig.patch.set_facecolor("white")
n = len(tiers_top_down)
ax.set_xlim(-1.15, 1.15)
ax.set_ylim(-0.2, n + 0.4)
ax.axis("off")
ax.set_title("Curriculum: 124 tasks across 5 tiers", fontsize=15,
fontweight="bold", color=INK, pad=12)
for i, (name, count, chaos, strat, color) in enumerate(tiers_top_down):
# i=0 \u2192 apex (top, narrowest); i=n-1 \u2192 base (bottom, widest)
y_top = n - i
y_bot = n - i - 1
half_top = 0.45 + 0.55 * (i / (n - 1)) # narrow at apex
half_bot = 0.45 + 0.55 * ((i + 1) / (n - 1)) # wider at base
ax.add_patch(
mpatches.Polygon(
[(-half_bot, y_bot), (half_bot, y_bot),
(half_top, y_top), (-half_top, y_top)],
closed=True, facecolor=color, edgecolor="white",
linewidth=2, alpha=0.95,
)
)
y_mid = (y_top + y_bot) / 2
ax.text(0, y_mid + 0.18, name, fontsize=14, fontweight="bold",
color="white", ha="center", va="center")
ax.text(0, y_mid - 0.18,
f"{count} tasks \u00b7 chaos {chaos} \u00b7 {strat}",
fontsize=9.5, color="white", ha="center", va="center", alpha=0.97)
# Drift sidebar (right panel)
ax2.set_xlim(0, 1)
ax2.set_ylim(0, n + 0.4)
ax2.axis("off")
ax2.set_title("Adversarial track", fontsize=13, fontweight="bold",
color=INK, pad=12)
box = FancyBboxPatch(
(0.08, 1.7), 0.84, 1.7,
boxstyle="round,pad=0.04,rounding_size=0.10",
facecolor=PINK, edgecolor=PINK_DARK, linewidth=2, alpha=0.92,
)
ax2.add_patch(box)
ax2.text(0.5, 3.0, "Drift", fontsize=20, fontweight="bold",
color="white", ha="center")
ax2.text(0.5, 2.6, "9 tasks", fontsize=12, color="white", ha="center")
ax2.text(0.5, 2.05, "2\u20133 mutations\nrandomized\nper episode",
fontsize=9.5, color="white", ha="center", va="center")
ax2.text(0.5, 0.85,
"Promotion paths\n\u2014\nstandard: min episodes + rate\nfast-track: 3 consecutive \u22650.9",
fontsize=9, color=SLATE, ha="center", va="center")
_save(fig, "tier_pyramid.png")
def dataset_composition() -> None:
traj_labels = ["success", "continuation", "failure recovery",
"verification", "hint usage"]
traj_sizes = [55, 20, 15, 5, 5]
# Expert excluded entirely \u2014 0% is meaningless on a donut.
tier_labels = ["warmup", "beginner", "intermediate", "advanced"]
tier_sizes = [50, 30, 15, 5]
fig, axes = plt.subplots(1, 2, figsize=(15.5, 6))
fig.patch.set_facecolor("white")
fig.suptitle("SFT dataset composition \u2022 1,500 rows",
fontsize=16, fontweight="bold", color=INK, y=1.02)
fig.subplots_adjust(wspace=0.7, left=0.04, right=0.96)
def donut(ax, sizes, labels, title, colors, center_label):
wedges, _ = ax.pie(
sizes, labels=None, colors=colors,
wedgeprops={"width": 0.42, "edgecolor": "white", "linewidth": 2},
startangle=90,
)
ax.set_title(title, fontsize=13, fontweight="bold", color=INK, pad=10)
legend_labels = [f"{l} \u2014 {s}%" for l, s in zip(labels, sizes)]
ax.legend(wedges, legend_labels, loc="center left",
bbox_to_anchor=(1.05, 0.5), frameon=False, fontsize=11)
ax.text(0, 0, center_label, fontsize=14, fontweight="bold",
color=INK, ha="center", va="center")
donut(axes[0], traj_sizes, traj_labels, "Trajectory types",
["#22c55e", "#3a86ff", "#fb5607", "#8338ec", "#ffbe0b"],
"5 types")
donut(axes[1], tier_sizes, tier_labels, "Tier weights",
["#22c55e", "#06b6d4", PALETTE[0], PALETTE[1]],
"4 tiers\n+ expert*")
fig.text(
0.5, -0.04,
"* expert tasks excluded from SFT (randomized state checks \u2192 no canonical script). "
"GRPO handles them via live reward signal.",
fontsize=10, color=SLATE, ha="center", style="italic",
)
_save(fig, "dataset_composition.png")
def reward_components() -> None:
components = [
("task achieved", 1.00, "+", "achieve"),
("chaos survival", 0.05, "+", "achieve"),
("partial progress", 0.80, "+", "shape"),
("progress delta", 0.10, "+", "shape"),
("idempotent retry", 0.02, "+", "shape"),
("rollback (per pair)", 0.10, "-", "penalty"),
("command failed", 0.50, "-", "penalty"),
("hint decay (n=3)", 0.39, "-", "penalty"),
]
color_map = {
"achieve": "#22c55e",
"shape": PALETTE[0],
"penalty": PINK,
}
labels = [c[0] for c in components]
values = [c[1] if c[2] == "+" else -c[1] for c in components]
colors = [color_map[c[3]] for c in components]
signed = [f"{c[2]}{c[1]:.2f}" for c in components]
fig, ax = plt.subplots(figsize=(11.5, 5.8))
fig.patch.set_facecolor("white")
y_pos = np.arange(len(labels))[::-1]
ax.barh(y_pos, values, color=colors, edgecolor="white", linewidth=1.5,
height=0.72, alpha=0.92)
for y, v, txt in zip(y_pos, values, signed):
offset = 0.025 if v >= 0 else -0.025
ha = "left" if v >= 0 else "right"
ax.text(v + offset, y, txt, va="center", ha=ha,
fontsize=11, color=INK, fontweight="bold")
ax.set_yticks(y_pos)
ax.set_yticklabels(labels, fontsize=11.5, color=INK)
ax.axvline(0, color=INK, linewidth=1)
ax.set_xlim(-0.65, 1.18)
ax.set_xlabel("contribution to reward", fontsize=10.5, color=SLATE)
ax.set_title("Reward shaping: every modifier the agent can earn or lose",
fontsize=14, fontweight="bold", color=INK, pad=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_color(GRID)
ax.spines["bottom"].set_color(GRID)
ax.tick_params(axis="x", colors=SLATE)
ax.grid(axis="x", color=GRID, linewidth=0.8, alpha=0.6, zorder=0)
ax.set_axisbelow(True)
legend_handles = [
mpatches.Patch(color="#22c55e", label="achievement (full reward)"),
mpatches.Patch(color=PALETTE[0], label="dense shaping signal"),
mpatches.Patch(color=PINK, label="penalty / decay"),
]
ax.legend(handles=legend_handles, loc="lower right", frameon=False, fontsize=10)
fig.text(
0.5, -0.04,
"Final reward is clamped to [0.0, 0.99] before completion (1.0 reserved for "
"verified achievement). Hint decay applied last as a multiplier (0.85^n).",
fontsize=9.5, color=SLATE, ha="center", style="italic",
)
_save(fig, "reward_components.png")
def main() -> None:
hero()
tier_pyramid()
dataset_composition()
reward_components()
if __name__ == "__main__":
main()