Spaces:

md896
/

sql-debug-env

Running

App Files Files Community

sql-debug-env / scripts /benchmark_local.py

md896

Initial OpenEnv SQL debug environment

30cf758 about 1 month ago

raw

history blame contribute delete

1.88 kB

	"""
	Lightweight local benchmark for sql-debug-env.

	Runs deterministic endpoint checks and prints simple latency metrics.
	No LLM key required.
	"""
	from __future__ import annotations

	import statistics
	import time
	from typing import Dict, List

	import httpx


	BASE_URL = "http://localhost:7860"


	def timed_call(client: httpx.Client, method: str, path: str, json_body: Dict \| None = None) -> float:
	start = time.perf_counter()
	if method == "GET":
	r = client.get(path)
	else:
	r = client.post(path, json=json_body)
	r.raise_for_status()
	return (time.perf_counter() - start) * 1000


	def summarize(samples: List[float]) -> str:
	p50 = statistics.median(samples)
	p95 = sorted(samples)[int(len(samples) * 0.95) - 1]
	avg = statistics.mean(samples)
	return f"avg={avg:.2f}ms p50={p50:.2f}ms p95={p95:.2f}ms n={len(samples)}"


	def main() -> None:
	with httpx.Client(base_url=BASE_URL, timeout=30.0) as client:
	# Warmup + health check
	client.get("/health").raise_for_status()

	health_times = [timed_call(client, "GET", "/health") for _ in range(25)]
	tasks_times = [timed_call(client, "GET", "/tasks") for _ in range(25)]

	reset_times: List[float] = []
	step_times: List[float] = []
	for _ in range(25):
	reset_times.append(
	timed_call(client, "POST", "/reset", {"task_id": "easy_syntax_fix"})
	)
	step_times.append(
	timed_call(client, "POST", "/step", {"action": {"action_type": "inspect_schema"}})
	)

	print("Benchmark results (local)")
	print(f"GET /health: {summarize(health_times)}")
	print(f"GET /tasks: {summarize(tasks_times)}")
	print(f"POST /reset: {summarize(reset_times)}")
	print(f"POST /step (inspect_schema): {summarize(step_times)}")


	if __name__ == "__main__":
	main()