Spaces:

Inzimam786
/

python-debugger-env

Runtime error

App Files Files Community

python-debugger-env / pydebug_optimizer /utils.py

Inzimam786

Upload 23 files

fe64236 verified 2 months ago

Raw

History Blame Contribute Delete

5.7 kB

	"""
	utils.py
	====================================

	This module contains helper utilities for:

	1. ⏱️ Performance measurement (deterministic timing)
	2. 🧹 Parsing agent responses (clean JSON extraction)
	3. ⚡ Speed comparison (optimization scoring)
	4. 📊 Logging performance reports (feedback to agent)

	🧠 WHY THIS MATTERS:
	--------------------
	In our environment, we are not just fixing code —
	we are MEASURING how much better it becomes.

	This is critical for:
	- Reward shaping
	- Fair evaluation
	- Reproducibility (required by OpenEnv)

	"""

	import time
	import json
	import re
	from typing import Any, Dict, Callable


	# ============================================================
	# ⏱️ SPEED MEASUREMENT
	# ============================================================

	def measure_execution_time(code_str: str, inputs: Dict[str, Any], iterations: int = 5) -> float:
	"""
	Measure execution time of code deterministically.

	Why multiple iterations?
	-------------------------
	CPU performance fluctuates due to:
	- Background processes
	- OS scheduling
	- Cache effects

	To reduce noise, we:
	- Run the code multiple times
	- Take the MINIMUM time (best-case performance)

	Why time.perf_counter() instead of time.time()?
	------------------------------------------------
	time.time():
	❌ Lower precision
	❌ Affected by system clock changes

	time.perf_counter():
	✅ High precision (nanosecond-level)
	✅ Monotonic (never goes backward)
	✅ Best for benchmarking

	Parameters:
	-----------
	code_str : str
	Python code to execute

	inputs : dict
	Variables passed into execution environment

	iterations : int
	Number of times to run the code

	Returns:
	--------
	float → minimum execution time (seconds)
	"""

	exec_times = []

	for _ in range(iterations):
	# Create isolated execution environment
	local_env = dict(inputs)

	start = time.perf_counter()

	try:
	exec(code_str, {}, local_env)
	except Exception:
	# If code fails, treat as very slow
	return float("inf")

	end = time.perf_counter()

	exec_times.append(end - start)

	return min(exec_times) if exec_times else float("inf")


	# ============================================================
	# 🧹 JSON CLEANER
	# ============================================================

	def parse_agent_response(response_str: str) -> Dict[str, Any]:
	"""
	Parse agent response into a clean JSON dictionary.

	Problem:
	--------
	LLMs often return JSON wrapped in markdown like:

	```json
	{
	"key": "value"
	}
	```

	This function:
	- Removes markdown wrappers
	- Extracts JSON safely
	- Returns Python dictionary

	Parameters:
	-----------
	response_str : str

	Returns:
	--------
	dict
	"""

	if not response_str:
	return {}

	# Remove markdown code blocks
	cleaned = re.sub(r"```json\|```", "", response_str, flags=re.IGNORECASE).strip()

	# Try direct JSON parsing
	try:
	return json.loads(cleaned)
	except json.JSONDecodeError:
	pass

	# Try to extract JSON substring
	match = re.search(r"\{.*\}", cleaned, re.DOTALL)
	if match:
	try:
	return json.loads(match.group())
	except json.JSONDecodeError:
	pass

	# Fallback
	return {}


	# ============================================================
	# ⚡ SPEEDUP CALCULATOR
	# ============================================================

	def compute_speedup(baseline_time: float, new_time: float) -> float:
	"""
	Compute speedup ratio between two execution times.

	Formula:
	speedup = baseline_time / new_time

	Examples:
	---------
	baseline = 1.0s
	new = 0.1s
	→ speedup = 10.0 (10x faster)

	Edge Cases:
	-----------
	- If new_time == 0 → return large number
	- If baseline_time == inf → return 0

	Returns:
	--------
	float → speedup ratio
	"""

	if new_time == 0:
	return float("inf")

	if baseline_time == float("inf"):
	return 0.0

	return baseline_time / new_time


	# ============================================================
	# 📊 PERFORMANCE REPORT FORMATTER
	# ============================================================

	def format_performance_report(
	baseline_time: float,
	new_time: float,
	speedup: float
	) -> str:
	"""
	Create a human-readable performance report.

	This gets fed back to the agent as part of Observation.

	Why?
	----
	Agents learn better with structured feedback.

	Example Output:
	---------------
	Baseline Time: 0.5231 sec
	Optimized Time: 0.0512 sec
	Speedup: 10.21x faster

	Returns:
	--------
	str
	"""

	if baseline_time == float("inf"):
	baseline_str = "Failed"
	else:
	baseline_str = f"{baseline_time:.6f} sec"

	if new_time == float("inf"):
	new_str = "Failed"
	else:
	new_str = f"{new_time:.6f} sec"

	if speedup == float("inf"):
	speedup_str = "∞ (instant)"
	else:
	speedup_str = f"{speedup:.2f}x faster"

	report = (
	f"Performance Report:\n"
	f"- Baseline Time: {baseline_str}\n"
	f"- Optimized Time: {new_str}\n"
	f"- Speedup: {speedup_str}"
	)

	return report