| #!/bin/bash |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
| cd "$(dirname "$0")/../.." |
|
|
| PYTHON=".venv/bin/python" |
| GENS=10 |
| PARALLEL=2 |
|
|
| |
| |
| |
| EVAL_PORT=8755 |
| EVAL_URL="http://localhost:${EVAL_PORT}" |
|
|
| if curl -s "${EVAL_URL}/api/v1/status" > /dev/null 2>&1; then |
| echo "Eval service already running at ${EVAL_URL}" |
| else |
| echo "Starting eval service on port ${EVAL_PORT}..." |
| OPENHANDS_LOG_COMPLETIONS=1 ENABLE_FULL_TRAJECTORY_LOG=1 \ |
| ${PYTHON} eval_agent/ev2_service_standalone.py \ |
| --host "0.0.0.0" --port "${EVAL_PORT}" & |
| EVAL_PID=$! |
|
|
| |
| for i in $(seq 1 30); do |
| if curl -s "${EVAL_URL}/api/v1/status" > /dev/null 2>&1; then |
| echo "Eval service ready (pid=${EVAL_PID})" |
| break |
| fi |
| sleep 1 |
| done |
|
|
| if ! curl -s "${EVAL_URL}/api/v1/status" > /dev/null 2>&1; then |
| echo "ERROR: Eval service failed to start" |
| kill "${EVAL_PID}" 2>/dev/null || true |
| exit 1 |
| fi |
|
|
| |
| trap "echo 'Stopping eval service...'; kill ${EVAL_PID} 2>/dev/null || true" EXIT |
| fi |
|
|
| echo "========================================" |
| echo "Frontier-CS Smoke Test" |
| echo "========================================" |
| echo "" |
|
|
| for PID in 0 1; do |
| echo "----------------------------------------" |
| echo "Problem ${PID} (${GENS} generations)" |
| echo "----------------------------------------" |
| ${PYTHON} tasks/frontier_cs_entry/run_experiment.py \ |
| --experiment-name "smoke_p${PID}" \ |
| --problem-id "${PID}" \ |
| --seed-model gemini3pro \ |
| --num-generations "${GENS}" \ |
| --max-parallel-jobs "${PARALLEL}" \ |
| --use-eval-service \ |
| --eval-service-url "${EVAL_URL}" \ |
| --eval-trigger-mode periodic \ |
| --eval-trigger-interval 5 |
| echo "" |
| done |
|
|
| echo "========================================" |
| echo "Smoke test complete" |
| echo "========================================" |
|
|