Spaces:

printf-sourav
/

DevOps_Debugger

Sleeping

App Files Files Community

DevOps_Debugger / scripts /demo.py

printf-sourav

Initial commit

27cdb3e 2 months ago

Raw

History Blame Contribute Delete

12 kB

	#!/usr/bin/env python3
	"""
	Demo Script — Before/After training comparison.

	Runs the DevOps RL agent on scenarios before and after training,
	showing the command sequences side by side. This is the primary
	demo output for judges.

	Usage:
	python scripts/demo.py
	python scripts/demo.py --episodes 100
	python scripts/demo.py --episodes 500 --scenario missing_flask
	"""

	from __future__ import annotations

	import argparse
	import sys
	import os

	# Add project root to path
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from rich.console import Console
	from rich.panel import Panel
	from rich.table import Table

	from agent.baseline_agent import BaselineAgent
	from devops_env.env import DevOpsEnv
	from replay.buffer import ReplayBuffer
	from scenarios.registry import ScenarioRegistry
	from training.curriculum import CurriculumScheduler

	console = Console()


	class UntrainedAgent:
	"""Simulates a naive untrained agent that makes bad decisions.

	Deliberately issues suboptimal commands to show the contrast
	with the trained baseline/LLM agent.
	"""

	def __init__(self) -> None:
	self._step = 0

	def act(self, observation: dict) -> str:
	"""Generate a deliberately poor command sequence."""
	self._step += 1
	error_type = observation.get("error_type", "unknown")
	error_log = observation.get("error_log", "")

	if error_type == "missing_package":
	# Bad sequence: try running first, then dangerous, then wrong
	if self._step == 1:
	return "python /app/server.py"
	elif self._step == 2:
	return "sudo pip install flask" # Will be blocked
	elif self._step == 3:
	return "apt install python"
	else:
	return "echo 'I give up'"

	elif error_type == "port_conflict":
	if self._step == 1:
	return "python /app/server.py"
	elif self._step == 2:
	return "python /app/server.py" # Repeat
	else:
	return "echo 'stuck'"

	elif error_type == "missing_env":
	if self._step == 1:
	return "python /app/db_app.py"
	elif self._step == 2:
	return "python /app/db_app.py" # Repeat
	else:
	return "echo 'no idea'"

	return "echo 'unknown error'"


	def run_episode(agent, scenario_id: str, registry: ScenarioRegistry) -> dict:
	"""Run a single episode and return the results.

	Args:
	agent: Any agent with an act(observation) -> str method.
	scenario_id: ID of the scenario to run.
	registry: Scenario registry.

	Returns:
	Dict with episode results.
	"""
	env = DevOpsEnv(
	scenario_registry=registry,
	target_scenario=scenario_id,
	max_steps=10,
	)

	obs, info = env.reset()
	steps = []
	total_reward = 0.0
	done = False

	while not done:
	action = agent.act(obs)
	obs, reward, terminated, truncated, step_info = env.step(action)
	total_reward += reward

	exit_code = step_info.get("execution_result", {}).get("exit_code", -1)
	blocked = step_info.get("execution_result", {}).get("blocked", False)
	solved = step_info.get("solved", False)

	# Determine status string
	if blocked:
	status = "DANGEROUS COMMAND BLOCKED"
	elif solved:
	status = "success"
	elif exit_code == 0:
	status = "ok (exit 0)"
	else:
	status = f"failed (exit {exit_code})"

	steps.append({
	"step": len(steps) + 1,
	"action": action,
	"status": status,
	"reward": reward,
	"solved": solved,
	"blocked": blocked,
	})
	done = terminated or truncated

	summary = env.get_episode_summary()
	env.close()

	return {
	"scenario_id": scenario_id,
	"initial_error": info.get("description", ""),
	"steps": steps,
	"total_reward": total_reward,
	"solved": summary["solved"],
	"total_steps": len(steps),
	}


	def print_episode_plain(title: str, result: dict) -> None:
	"""Print episode in the exact format judges expect."""
	error_descriptions = {
	"missing_flask": "ModuleNotFoundError: flask",
	"missing_numpy": "ModuleNotFoundError: numpy",
	"missing_requests": "ModuleNotFoundError: requests",
	"wrong_python_version": "SyntaxError: invalid syntax (python2)",
	"port_conflict": "OSError: Address already in use (port 5000)",
	"missing_env_var": "KeyError: 'DATABASE_URL'",
	"broken_requirements": "ERROR: ResolutionImpossible",
	}

	error = error_descriptions.get(result["scenario_id"], result["initial_error"])

	print(f"\n=== {title} ===")
	print(f"Error: {error}")

	for step in result["steps"]:
	action_short = step["action"]
	if len(action_short) > 35:
	action_short = action_short[:32] + "..."
	print(f"Step {step['step']}: {action_short:<35s} → {step['status']}")

	solved_str = "SOLVED" if result["solved"] else "FAILED"
	steps_info = f"in {result['total_steps']} steps " if result["solved"] else ""
	print(f"Result: {solved_str} {steps_info}(reward: {result['total_reward']:+.1f})")


	def display_episode_rich(title: str, result: dict, style: str) -> None:
	"""Display an episode result in a formatted Rich panel."""
	lines = []
	lines.append(f"Scenario: [bold]{result['scenario_id']}[/bold]")
	lines.append("")

	for step in result["steps"]:
	if step["blocked"]:
	status = "[red]⚠ BLOCKED[/red]"
	elif step["solved"]:
	status = "[green]✓ SOLVED[/green]"
	elif "failed" in step["status"]:
	status = f"[red]✗ {step['status']}[/red]"
	else:
	status = f"[yellow]{step['status']}[/yellow]"

	lines.append(f" Step {step['step']}: [cyan]{step['action']}[/cyan]")
	lines.append(f" → {status} (reward={step['reward']:+.1f})")

	lines.append("")
	if result["solved"]:
	lines.append(f"[green bold]SOLVED ✓ in {result['total_steps']} steps[/green bold]")
	else:
	lines.append(f"[red bold]FAILED ✗[/red bold]")
	lines.append(f"Total Reward: [bold]{result['total_reward']:+.1f}[/bold]")

	console.print(Panel("\n".join(lines), title=f"[bold]{title}[/bold]",
	border_style=style, padding=(1, 2)))


	def run_training_batch(num_episodes: int, registry: ScenarioRegistry,
	replay_buffer: ReplayBuffer) -> None:
	"""Run training episodes with the baseline agent."""
	agent = BaselineAgent()
	curriculum = CurriculumScheduler()

	console.print(f"\n[bold cyan]Running {num_episodes} training episodes...[/bold cyan]\n")

	solved_count = 0
	for i in range(num_episodes):
	level = curriculum.sample_level()
	scenario = registry.get_random(level=level)
	result = run_episode(agent, scenario.id, registry)

	replay_buffer.store_episode(
	scenario_id=result["scenario_id"],
	level=scenario.level,
	steps=result["steps"],
	total_reward=result["total_reward"],
	solved=result["solved"],
	training_episode=i + 1,
	)

	if result["solved"]:
	solved_count += 1

	# Record in curriculum for window tracking
	curriculum.record_episode(level=scenario.level, solved=result["solved"])

	# Progress bar every 20 episodes
	if (i + 1) % 20 == 0:
	rate = solved_count / (i + 1) * 100
	bar = "█" * int(rate / 5) + "░" * (20 - int(rate / 5))
	levels = curriculum.get_active_levels()
	console.print(
	f" Episode {i+1:>4d}/{num_episodes} \| "
	f"Solve rate: {rate:5.1f}% [{bar}] \| "
	f"Levels: {levels}"
	)


	def main():
	"""Run the before/after training demo."""
	parser = argparse.ArgumentParser(description="DevOps RL Agent — Before/After Demo")
	parser.add_argument("--episodes", type=int, default=100, help="Training episodes to run")
	parser.add_argument("--scenario", type=str, default="missing_flask", help="Demo scenario ID")
	args = parser.parse_args()

	console.print(Panel(
	"[bold]DevOps RL Agent — Before/After Training Demo[/bold]\n\n"
	"Shows how the RL agent improves at fixing broken\n"
	"Linux/Python environments through reinforcement learning.\n\n"
	"[dim]This is the output judges see first.[/dim]",
	title="🤖 AI DevOps Agent",
	border_style="bright_magenta",
	padding=(1, 4),
	))

	registry = ScenarioRegistry()
	registry.register_defaults()
	db_url = "sqlite:///demo_replay.db"
	replay_buffer = ReplayBuffer(db_url)

	# ────────── BEFORE TRAINING ──────────
	console.print("\n" + "═" * 60)
	console.print("[bold red] PHASE 1: BEFORE TRAINING[/bold red]")
	console.print("═" * 60)

	untrained = UntrainedAgent()
	before_result = run_episode(untrained, args.scenario, registry)
	print_episode_plain(f"BEFORE TRAINING (episode 0)", before_result)
	display_episode_rich("Before Training", before_result, style="red")

	# ────────── TRAINING ──────────
	console.print("\n" + "═" * 60)
	console.print("[bold yellow] PHASE 2: TRAINING[/bold yellow]")
	console.print("═" * 60)

	run_training_batch(args.episodes, registry, replay_buffer)

	# ────────── AFTER TRAINING ──────────
	console.print("\n" + "═" * 60)
	console.print("[bold green] PHASE 3: AFTER TRAINING[/bold green]")
	console.print("═" * 60)

	trained = BaselineAgent()
	after_result = run_episode(trained, args.scenario, registry)
	print_episode_plain(f"AFTER TRAINING (episode {args.episodes})", after_result)
	display_episode_rich("After Training", after_result, style="green")

	# ────────── STATISTICS ──────────
	console.print("\n" + "═" * 60)
	console.print("[bold cyan] TRAINING STATISTICS[/bold cyan]")
	console.print("═" * 60)

	stats = replay_buffer.get_stats()

	table = Table(title="Performance by Level")
	table.add_column("Level", style="bold")
	table.add_column("Episodes", justify="right")
	table.add_column("Solve Rate", justify="right")
	table.add_column("Mean Reward", justify="right")
	table.add_column("Mean Steps", justify="right")

	for level in [1, 2, 3]:
	if level in stats.get("levels", {}):
	ls = stats["levels"][level]
	if ls["count"] > 0:
	c = "green" if ls["solve_rate"] > 0.8 else "yellow" if ls["solve_rate"] > 0.5 else "red"
	table.add_row(
	f"Level {level}",
	str(ls["count"]),
	f"[{c}]{ls['solve_rate']:.1%}[/{c}]",
	f"{ls['mean_reward']:.1f}",
	f"{ls['mean_steps']:.1f}",
	)

	console.print(table)

	# Scenario breakdown
	if "scenarios" in stats:
	sc_table = Table(title="Performance by Scenario")
	sc_table.add_column("Scenario", style="bold")
	sc_table.add_column("Attempts", justify="right")
	sc_table.add_column("Solve Rate", justify="right")

	for sid, sc_stats in sorted(stats["scenarios"].items()):
	c = "green" if sc_stats["solve_rate"] > 0.8 else "yellow" if sc_stats["solve_rate"] > 0.5 else "red"
	sc_table.add_row(sid, str(sc_stats["count"]),
	f"[{c}]{sc_stats['solve_rate']:.1%}[/{c}]")
	console.print(sc_table)

	console.print("\n[bold green]Demo complete! ✓[/bold green]\n")


	if __name__ == "__main__":
	main()