""" utils.py ==================================== This module contains helper utilities for: 1. โฑ๏ธ Performance measurement (deterministic timing) 2. ๐Ÿงน Parsing agent responses (clean JSON extraction) 3. โšก Speed comparison (optimization scoring) 4. ๐Ÿ“Š Logging performance reports (feedback to agent) ๐Ÿง  WHY THIS MATTERS: -------------------- In our environment, we are not just fixing code โ€” we are MEASURING how much better it becomes. This is critical for: - Reward shaping - Fair evaluation - Reproducibility (required by OpenEnv) """ import time import json import re from typing import Any, Dict, Callable # ============================================================ # โฑ๏ธ SPEED MEASUREMENT # ============================================================ def measure_execution_time(code_str: str, inputs: Dict[str, Any], iterations: int = 5) -> float: """ Measure execution time of code deterministically. Why multiple iterations? ------------------------- CPU performance fluctuates due to: - Background processes - OS scheduling - Cache effects To reduce noise, we: - Run the code multiple times - Take the MINIMUM time (best-case performance) Why time.perf_counter() instead of time.time()? ------------------------------------------------ time.time(): โŒ Lower precision โŒ Affected by system clock changes time.perf_counter(): โœ… High precision (nanosecond-level) โœ… Monotonic (never goes backward) โœ… Best for benchmarking Parameters: ----------- code_str : str Python code to execute inputs : dict Variables passed into execution environment iterations : int Number of times to run the code Returns: -------- float โ†’ minimum execution time (seconds) """ exec_times = [] for _ in range(iterations): # Create isolated execution environment local_env = dict(inputs) start = time.perf_counter() try: exec(code_str, {}, local_env) except Exception: # If code fails, treat as very slow return float("inf") end = time.perf_counter() exec_times.append(end - start) return min(exec_times) if exec_times else float("inf") # ============================================================ # ๐Ÿงน JSON CLEANER # ============================================================ def parse_agent_response(response_str: str) -> Dict[str, Any]: """ Parse agent response into a clean JSON dictionary. Problem: -------- LLMs often return JSON wrapped in markdown like: ```json { "key": "value" } ``` This function: - Removes markdown wrappers - Extracts JSON safely - Returns Python dictionary Parameters: ----------- response_str : str Returns: -------- dict """ if not response_str: return {} # Remove markdown code blocks cleaned = re.sub(r"```json|```", "", response_str, flags=re.IGNORECASE).strip() # Try direct JSON parsing try: return json.loads(cleaned) except json.JSONDecodeError: pass # Try to extract JSON substring match = re.search(r"\{.*\}", cleaned, re.DOTALL) if match: try: return json.loads(match.group()) except json.JSONDecodeError: pass # Fallback return {} # ============================================================ # โšก SPEEDUP CALCULATOR # ============================================================ def compute_speedup(baseline_time: float, new_time: float) -> float: """ Compute speedup ratio between two execution times. Formula: speedup = baseline_time / new_time Examples: --------- baseline = 1.0s new = 0.1s โ†’ speedup = 10.0 (10x faster) Edge Cases: ----------- - If new_time == 0 โ†’ return large number - If baseline_time == inf โ†’ return 0 Returns: -------- float โ†’ speedup ratio """ if new_time == 0: return float("inf") if baseline_time == float("inf"): return 0.0 return baseline_time / new_time # ============================================================ # ๐Ÿ“Š PERFORMANCE REPORT FORMATTER # ============================================================ def format_performance_report( baseline_time: float, new_time: float, speedup: float ) -> str: """ Create a human-readable performance report. This gets fed back to the agent as part of Observation. Why? ---- Agents learn better with structured feedback. Example Output: --------------- Baseline Time: 0.5231 sec Optimized Time: 0.0512 sec Speedup: 10.21x faster Returns: -------- str """ if baseline_time == float("inf"): baseline_str = "Failed" else: baseline_str = f"{baseline_time:.6f} sec" if new_time == float("inf"): new_str = "Failed" else: new_str = f"{new_time:.6f} sec" if speedup == float("inf"): speedup_str = "โˆž (instant)" else: speedup_str = f"{speedup:.2f}x faster" report = ( f"Performance Report:\n" f"- Baseline Time: {baseline_str}\n" f"- Optimized Time: {new_str}\n" f"- Speedup: {speedup_str}" ) return report