Inzimam786's picture
Upload 23 files
fe64236 verified
Raw
History Blame Contribute Delete
5.7 kB
"""
utils.py
====================================
This module contains helper utilities for:
1. ⏱️ Performance measurement (deterministic timing)
2. 🧹 Parsing agent responses (clean JSON extraction)
3. ⚑ Speed comparison (optimization scoring)
4. πŸ“Š Logging performance reports (feedback to agent)
🧠 WHY THIS MATTERS:
--------------------
In our environment, we are not just fixing code β€”
we are MEASURING how much better it becomes.
This is critical for:
- Reward shaping
- Fair evaluation
- Reproducibility (required by OpenEnv)
"""
import time
import json
import re
from typing import Any, Dict, Callable
# ============================================================
# ⏱️ SPEED MEASUREMENT
# ============================================================
def measure_execution_time(code_str: str, inputs: Dict[str, Any], iterations: int = 5) -> float:
"""
Measure execution time of code deterministically.
Why multiple iterations?
-------------------------
CPU performance fluctuates due to:
- Background processes
- OS scheduling
- Cache effects
To reduce noise, we:
- Run the code multiple times
- Take the MINIMUM time (best-case performance)
Why time.perf_counter() instead of time.time()?
------------------------------------------------
time.time():
❌ Lower precision
❌ Affected by system clock changes
time.perf_counter():
βœ… High precision (nanosecond-level)
βœ… Monotonic (never goes backward)
βœ… Best for benchmarking
Parameters:
-----------
code_str : str
Python code to execute
inputs : dict
Variables passed into execution environment
iterations : int
Number of times to run the code
Returns:
--------
float β†’ minimum execution time (seconds)
"""
exec_times = []
for _ in range(iterations):
# Create isolated execution environment
local_env = dict(inputs)
start = time.perf_counter()
try:
exec(code_str, {}, local_env)
except Exception:
# If code fails, treat as very slow
return float("inf")
end = time.perf_counter()
exec_times.append(end - start)
return min(exec_times) if exec_times else float("inf")
# ============================================================
# 🧹 JSON CLEANER
# ============================================================
def parse_agent_response(response_str: str) -> Dict[str, Any]:
"""
Parse agent response into a clean JSON dictionary.
Problem:
--------
LLMs often return JSON wrapped in markdown like:
```json
{
"key": "value"
}
```
This function:
- Removes markdown wrappers
- Extracts JSON safely
- Returns Python dictionary
Parameters:
-----------
response_str : str
Returns:
--------
dict
"""
if not response_str:
return {}
# Remove markdown code blocks
cleaned = re.sub(r"```json|```", "", response_str, flags=re.IGNORECASE).strip()
# Try direct JSON parsing
try:
return json.loads(cleaned)
except json.JSONDecodeError:
pass
# Try to extract JSON substring
match = re.search(r"\{.*\}", cleaned, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
# Fallback
return {}
# ============================================================
# ⚑ SPEEDUP CALCULATOR
# ============================================================
def compute_speedup(baseline_time: float, new_time: float) -> float:
"""
Compute speedup ratio between two execution times.
Formula:
speedup = baseline_time / new_time
Examples:
---------
baseline = 1.0s
new = 0.1s
β†’ speedup = 10.0 (10x faster)
Edge Cases:
-----------
- If new_time == 0 β†’ return large number
- If baseline_time == inf β†’ return 0
Returns:
--------
float β†’ speedup ratio
"""
if new_time == 0:
return float("inf")
if baseline_time == float("inf"):
return 0.0
return baseline_time / new_time
# ============================================================
# πŸ“Š PERFORMANCE REPORT FORMATTER
# ============================================================
def format_performance_report(
baseline_time: float,
new_time: float,
speedup: float
) -> str:
"""
Create a human-readable performance report.
This gets fed back to the agent as part of Observation.
Why?
----
Agents learn better with structured feedback.
Example Output:
---------------
Baseline Time: 0.5231 sec
Optimized Time: 0.0512 sec
Speedup: 10.21x faster
Returns:
--------
str
"""
if baseline_time == float("inf"):
baseline_str = "Failed"
else:
baseline_str = f"{baseline_time:.6f} sec"
if new_time == float("inf"):
new_str = "Failed"
else:
new_str = f"{new_time:.6f} sec"
if speedup == float("inf"):
speedup_str = "∞ (instant)"
else:
speedup_str = f"{speedup:.2f}x faster"
report = (
f"Performance Report:\n"
f"- Baseline Time: {baseline_str}\n"
f"- Optimized Time: {new_str}\n"
f"- Speedup: {speedup_str}"
)
return report