Spaces:

Inzimam786
/

python-debugger-env

Runtime error

File size: 5,697 Bytes

fe64236

"""

utils.py

====================================



This module contains helper utilities for:



1. ⏱️ Performance measurement (deterministic timing)

2. 🧹 Parsing agent responses (clean JSON extraction)

3. ⚡ Speed comparison (optimization scoring)

4. 📊 Logging performance reports (feedback to agent)



🧠 WHY THIS MATTERS:

--------------------

In our environment, we are not just fixing code —

we are MEASURING how much better it becomes.



This is critical for:

- Reward shaping

- Fair evaluation

- Reproducibility (required by OpenEnv)



"""

import time
import json
import re
from typing import Any, Dict, Callable


# ============================================================
# ⏱️ SPEED MEASUREMENT
# ============================================================

def measure_execution_time(code_str: str, inputs: Dict[str, Any], iterations: int = 5) -> float:
    """

    Measure execution time of code deterministically.



    Why multiple iterations?

    -------------------------

    CPU performance fluctuates due to:

    - Background processes

    - OS scheduling

    - Cache effects



    To reduce noise, we:

    - Run the code multiple times

    - Take the MINIMUM time (best-case performance)



    Why time.perf_counter() instead of time.time()?

    ------------------------------------------------

    time.time():

        ❌ Lower precision

        ❌ Affected by system clock changes



    time.perf_counter():

        ✅ High precision (nanosecond-level)

        ✅ Monotonic (never goes backward)

        ✅ Best for benchmarking



    Parameters:

    -----------

    code_str : str

        Python code to execute



    inputs : dict

        Variables passed into execution environment



    iterations : int

        Number of times to run the code



    Returns:

    --------

    float → minimum execution time (seconds)

    """

    exec_times = []

    for _ in range(iterations):
        # Create isolated execution environment
        local_env = dict(inputs)

        start = time.perf_counter()

        try:
            exec(code_str, {}, local_env)
        except Exception:
            # If code fails, treat as very slow
            return float("inf")

        end = time.perf_counter()

        exec_times.append(end - start)

    return min(exec_times) if exec_times else float("inf")


# ============================================================
# 🧹 JSON CLEANER
# ============================================================

def parse_agent_response(response_str: str) -> Dict[str, Any]:
    """

    Parse agent response into a clean JSON dictionary.



    Problem:

    --------

    LLMs often return JSON wrapped in markdown like:



    ```json

    {

        "key": "value"

    }

    ```



    This function:

    - Removes markdown wrappers

    - Extracts JSON safely

    - Returns Python dictionary



    Parameters:

    -----------

    response_str : str



    Returns:

    --------

    dict

    """

    if not response_str:
        return {}

    # Remove markdown code blocks
    cleaned = re.sub(r"```json|```", "", response_str, flags=re.IGNORECASE).strip()

    # Try direct JSON parsing
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        pass

    # Try to extract JSON substring
    match = re.search(r"\{.*\}", cleaned, re.DOTALL)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            pass

    # Fallback
    return {}


# ============================================================
# ⚡ SPEEDUP CALCULATOR
# ============================================================

def compute_speedup(baseline_time: float, new_time: float) -> float:
    """

    Compute speedup ratio between two execution times.



    Formula:

        speedup = baseline_time / new_time



    Examples:

    ---------

    baseline = 1.0s

    new = 0.1s

    → speedup = 10.0 (10x faster)



    Edge Cases:

    -----------

    - If new_time == 0 → return large number

    - If baseline_time == inf → return 0



    Returns:

    --------

    float → speedup ratio

    """

    if new_time == 0:
        return float("inf")

    if baseline_time == float("inf"):
        return 0.0

    return baseline_time / new_time


# ============================================================
# 📊 PERFORMANCE REPORT FORMATTER
# ============================================================

def format_performance_report(

    baseline_time: float,

    new_time: float,

    speedup: float

) -> str:
    """

    Create a human-readable performance report.



    This gets fed back to the agent as part of Observation.



    Why?

    ----

    Agents learn better with structured feedback.



    Example Output:

    ---------------

    Baseline Time: 0.5231 sec

    Optimized Time: 0.0512 sec

    Speedup: 10.21x faster



    Returns:

    --------

    str

    """

    if baseline_time == float("inf"):
        baseline_str = "Failed"
    else:
        baseline_str = f"{baseline_time:.6f} sec"

    if new_time == float("inf"):
        new_str = "Failed"
    else:
        new_str = f"{new_time:.6f} sec"

    if speedup == float("inf"):
        speedup_str = "∞ (instant)"
    else:
        speedup_str = f"{speedup:.2f}x faster"

    report = (
        f"Performance Report:\n"
        f"- Baseline Time: {baseline_str}\n"
        f"- Optimized Time: {new_str}\n"
        f"- Speedup: {speedup_str}"
    )

    return report