Spaces:
Sleeping
Sleeping
havinashpatil
Finalizing CodeArena RL Benchmark: frontend improvements, GRPO training scripts, and cleaned environment
03a7eb9 | """ | |
| CodeArena Agent Memory | |
| Self-improving memory across episodes. | |
| Stores best solutions per task + retrieves them to seed future fixes. | |
| """ | |
| import json | |
| import os | |
| import csv | |
| import time | |
| from typing import Optional | |
| MEMORY_FILE = os.path.join(os.path.dirname(__file__), "..", "agent_memory.json") | |
| CSV_FILE = os.path.join(os.path.dirname(__file__), "..", "complexity_rewards.csv") | |
| # ββ Memory Store ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_memory() -> dict: | |
| """Load agent memory from disk.""" | |
| try: | |
| if os.path.exists(MEMORY_FILE): | |
| with open(MEMORY_FILE, "r") as f: | |
| return json.load(f) | |
| except Exception as e: | |
| print(f"[Memory] Load error: {e}") | |
| return {} | |
| def save_memory(memory: dict) -> None: | |
| """Persist agent memory to disk.""" | |
| try: | |
| with open(MEMORY_FILE, "w") as f: | |
| json.dump(memory, f, indent=2) | |
| except Exception as e: | |
| print(f"[Memory] Save error: {e}") | |
| def store_success(task_id: str, code: str, reward: float) -> None: | |
| """ | |
| Store a successful solution if reward improves on previous best. | |
| Only keeps the BEST solution per task. | |
| """ | |
| memory = load_memory() | |
| existing = memory.get(task_id) | |
| if existing is None or reward > existing.get("reward", 0): | |
| memory[task_id] = { | |
| "best_code": code, | |
| "reward": round(reward, 4), | |
| "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), | |
| } | |
| save_memory(memory) | |
| print(f"[Memory] Stored new best for '{task_id}' with reward={reward:.3f}") | |
| def retrieve_memory(task_id: str) -> Optional[dict]: | |
| """ | |
| Retrieve the best known solution for a task. | |
| Returns dict with 'best_code' and 'reward', or None. | |
| """ | |
| memory = load_memory() | |
| return memory.get(task_id) | |
| def get_all_memories() -> dict: | |
| """Return all stored task memories (for dashboard display).""" | |
| return load_memory() | |
| # ββ Complexity vs Reward CSV Logger ββββββββββββββββββββββββββββββββββββββββββ | |
| def log_complexity_reward( | |
| task_id: str, | |
| reward: float, | |
| complexity: str, | |
| step: int, | |
| method: str = "ollama", | |
| ) -> None: | |
| """ | |
| Append a log entry to complexity_rewards.csv. | |
| Used to track: better algorithms β better rewards. | |
| """ | |
| log_entry = { | |
| "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), | |
| "task_id": task_id, | |
| "reward": round(reward, 4), | |
| "complexity": complexity, | |
| "step": step, | |
| "method": method, | |
| } | |
| try: | |
| file_exists = os.path.exists(CSV_FILE) | |
| with open(CSV_FILE, "a", newline="") as f: | |
| writer = csv.DictWriter(f, fieldnames=log_entry.keys()) | |
| if not file_exists or f.tell() == 0: | |
| writer.writeheader() | |
| writer.writerow(log_entry) | |
| except Exception as e: | |
| print(f"[Memory] CSV log error: {e}") | |
| def get_complexity_reward_stats() -> dict: | |
| """ | |
| Read CSV and compute average reward per complexity class. | |
| Returns dict like: {"O(n)": 0.88, "O(n^2)": 0.55, "O(n^3)": 0.12} | |
| """ | |
| stats: dict[str, list] = {} | |
| try: | |
| if not os.path.exists(CSV_FILE): | |
| return {} | |
| with open(CSV_FILE, "r") as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| c = row.get("complexity", "unknown") | |
| r = float(row.get("reward", 0)) | |
| stats.setdefault(c, []).append(r) | |
| return {k: round(sum(v) / len(v), 3) for k, v in stats.items()} | |
| except Exception as e: | |
| print(f"[Memory] Stats error: {e}") | |
| return {} | |