| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
| cd "$(dirname "$0")/../.." |
|
|
| PYTHON=".venv/bin/python" |
|
|
| |
| |
| |
| PID_START="${1:-0}" |
| PID_END="${2:-49}" |
| CONCURRENCY="${3:-20}" |
|
|
| GENS=50 |
| SEED_MODEL="gemini3pro" |
| LLM_MODELS="native-gemini-3-flash-preview" |
|
|
| TIMESTAMP="$(date +%Y%m%d_%H%M%S)" |
| EXP_NAME="vanilla_g${GENS}" |
| RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}" |
|
|
| PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems" |
| SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions" |
| LOG_DIR="logs/frontier_cs_parallel" |
| mkdir -p "${LOG_DIR}" "${RUN_DIR}" |
|
|
| |
| |
| |
| PIDS=() |
| for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do |
| if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then |
| if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then |
| PIDS+=("${pid}") |
| fi |
| fi |
| done |
|
|
| echo "========================================" |
| echo "Frontier-CS Parallel Batch (no eval agent)" |
| echo "========================================" |
| echo " Problems: ${PID_START}-${PID_END} (${#PIDS[@]} valid)" |
| echo " Concurrency: ${CONCURRENCY}" |
| echo " Generations: ${GENS}" |
| echo " Seed model: ${SEED_MODEL}" |
| echo " LLM: ${LLM_MODELS}" |
| echo " Run dir: ${RUN_DIR}" |
| echo " Logs: ${LOG_DIR}/" |
| echo "========================================" |
| echo "" |
|
|
| |
| |
| |
| run_problem() { |
| local pid="$1" |
|
|
| |
| export FRONTIER_CS_PROBLEM_ID="${pid}" |
|
|
| ${PYTHON} tasks/frontier_cs_entry/run_experiment.py \ |
| --experiment-name "${EXP_NAME}" \ |
| --problem-id "${pid}" \ |
| --seed-model "${SEED_MODEL}" \ |
| --num-generations "${GENS}" \ |
| --max-parallel-jobs 1 \ |
| --edit-backend single_shot_patch \ |
| --llm-models ${LLM_MODELS} \ |
| --run-dir "${RUN_DIR}" \ |
| --use-wandb \ |
| --wandb-project frontier-cs \ |
| --wandb-tags frontier_cs baseline problem_${pid} \ |
| --verbose \ |
| > "${LOG_DIR}/problem_${pid}.log" 2>&1 |
|
|
| local status=$? |
| if [ ${status} -eq 0 ]; then |
| echo "DONE problem ${pid}" |
| else |
| echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)" |
| fi |
| return ${status} |
| } |
|
|
| export -f run_problem |
| export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR |
|
|
| |
| |
| |
| ${PYTHON} scripts/dev/monitor_frontier_cs.py \ |
| --results-dir "${RUN_DIR}" --interval 30 & |
| MONITOR_PID=$! |
| trap "kill ${MONITOR_PID} 2>/dev/null || true" EXIT |
|
|
| |
| |
| |
| RUNNING=0 |
| DONE=0 |
| FAILED=0 |
| PIDS_RUNNING=() |
| PIDS_PROBLEM=() |
|
|
| for PID in "${PIDS[@]}"; do |
| |
| while [ ${RUNNING} -ge ${CONCURRENCY} ]; do |
| |
| wait -n 2>/dev/null || true |
|
|
| |
| NEW_RUNNING=0 |
| NEW_PIDS_RUNNING=() |
| NEW_PIDS_PROBLEM=() |
| for i in "${!PIDS_RUNNING[@]}"; do |
| if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then |
| NEW_RUNNING=$((NEW_RUNNING + 1)) |
| NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}") |
| NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}") |
| else |
| |
| wait "${PIDS_RUNNING[$i]}" 2>/dev/null |
| if [ $? -eq 0 ]; then |
| DONE=$((DONE + 1)) |
| else |
| FAILED=$((FAILED + 1)) |
| fi |
| fi |
| done |
| RUNNING=${NEW_RUNNING} |
| PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}") |
| PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}") |
| done |
|
|
| |
| echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]" |
| run_problem "${PID}" & |
| PIDS_RUNNING+=($!) |
| PIDS_PROBLEM+=("${PID}") |
| RUNNING=$((RUNNING + 1)) |
| done |
|
|
| |
| echo "" |
| echo "Waiting for remaining ${RUNNING} jobs..." |
| for i in "${!PIDS_RUNNING[@]}"; do |
| wait "${PIDS_RUNNING[$i]}" 2>/dev/null |
| if [ $? -eq 0 ]; then |
| DONE=$((DONE + 1)) |
| else |
| FAILED=$((FAILED + 1)) |
| fi |
| done |
|
|
| echo "" |
| echo "========================================" |
| echo "Parallel batch complete" |
| echo " Succeeded: ${DONE}" |
| echo " Failed: ${FAILED}" |
| echo " Total: ${#PIDS[@]}" |
| echo " Logs: ${LOG_DIR}/" |
| echo "========================================" |
|
|