JustinTX's picture
Add files using upload-large-folder tool
3f6526a verified
"""Public toolbox APIs exposed to auxiliary metric code.
Design goal:
- keep provider details hidden behind internal adapters
- expose simple, controlled text APIs to the agent
"""
from __future__ import annotations
import json
import os
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
def _usage_file(results_dir: Optional[str]) -> Optional[Path]:
if not results_dir:
return None
p = Path(results_dir).resolve()
# expected shape: <experiment>/gen_x/results
experiment_root = p.parent.parent if p.name == "results" and p.parent.name.startswith("gen_") else p
return experiment_root / "eval_agent_memory" / "tool_usage.json"
def _load_usage(path: Path) -> Dict[str, Any]:
try:
if path.exists():
with open(path) as f:
data = json.load(f)
if isinstance(data, dict):
return data
except Exception:
pass
return {}
def _save_usage(path: Path, data: Dict[str, Any]) -> None:
try:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(data, f, indent=2)
except Exception:
# Usage logging should never break evaluation.
pass
def _check_and_record_quota(tool: str, results_dir: Optional[str], limit: int) -> tuple[bool, str]:
usage_path = _usage_file(results_dir)
if usage_path is None:
return True, "no_results_dir"
usage = _load_usage(usage_path)
counters = usage.setdefault("counters", {})
key = f"{tool}_calls"
count = int(counters.get(key, 0))
if count >= limit:
return False, f"quota_exceeded:{tool}:{count}/{limit}"
counters[key] = count + 1
usage["last_update"] = time.time()
_save_usage(usage_path, usage)
return True, f"ok:{tool}:{count + 1}/{limit}"
def call_vision(
text: str,
image_paths: List[str],
*,
results_dir: Optional[str] = None,
) -> str:
"""Call hidden vision backend and return textual output.
Controls:
- EVAL_TOOLBOX_VISION_MAX_CALLS (default: 2)
- EVAL_TOOLBOX_VISION_MAX_IMAGES (default: 2)
"""
max_calls = int(os.getenv("EVAL_TOOLBOX_VISION_MAX_CALLS", "2"))
max_images = int(os.getenv("EVAL_TOOLBOX_VISION_MAX_IMAGES", "2"))
ok, note = _check_and_record_quota("vision", results_dir, max_calls)
if not ok:
return f"TOOL_ERROR: {note}"
selected = image_paths[:max_images]
if not selected:
return "TOOL_ERROR: no_images"
existing = [p for p in selected if Path(p).exists()]
if not existing:
return "TOOL_ERROR: image_not_found"
try:
# Lazy import so toolbox remains importable without vision deps.
from ._internal.vision_gemini import gemini_vision_chat
return gemini_vision_chat(prompt=text, image_paths=existing)
except Exception as e:
return f"TOOL_ERROR: vision_exception:{e}"
def call_tool(name: str, payload: Dict[str, Any]) -> str:
"""Generic toolbox API entrypoint for future tools."""
name = (name or "").strip().lower()
if name == "vision":
return call_vision(
text=str(payload.get("text", "")),
image_paths=list(payload.get("image_paths", [])),
results_dir=payload.get("results_dir"),
)
return f"TOOL_ERROR: unknown_tool:{name}"