shinka-backup / scripts /dev /run_frontier_cs_parallel.sh
JustinTX's picture
Add files using upload-large-folder tool
3f6526a verified
#!/bin/bash
# Parallel batch run: multiple Frontier-CS problems concurrently.
#
# Runs N problems in parallel without eval service (baseline mode).
# Each problem gets its own process with FRONTIER_CS_PROBLEM_ID env var.
#
# Usage:
# ./scripts/dev/run_frontier_cs_parallel.sh # problems 0-49, 20 parallel
# ./scripts/dev/run_frontier_cs_parallel.sh 0 49 20 # same, explicit
# ./scripts/dev/run_frontier_cs_parallel.sh 50 99 10 # problems 50-99, 10 parallel
set -euo pipefail
cd "$(dirname "$0")/../.."
PYTHON=".venv/bin/python"
# ============================================================================
# Configuration
# ============================================================================
PID_START="${1:-0}"
PID_END="${2:-49}"
CONCURRENCY="${3:-20}"
GENS=50
SEED_MODEL="gemini3pro"
LLM_MODELS="native-gemini-3-flash-preview"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
EXP_NAME="vanilla_g${GENS}"
RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"
PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
LOG_DIR="logs/frontier_cs_parallel"
mkdir -p "${LOG_DIR}" "${RUN_DIR}"
# ============================================================================
# Collect valid problem IDs
# ============================================================================
PIDS=()
for pid in $(ls "${PROBLEMS_DIR}" | sort -n); do
if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then
if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
PIDS+=("${pid}")
fi
fi
done
echo "========================================"
echo "Frontier-CS Parallel Batch (no eval agent)"
echo "========================================"
echo " Problems: ${PID_START}-${PID_END} (${#PIDS[@]} valid)"
echo " Concurrency: ${CONCURRENCY}"
echo " Generations: ${GENS}"
echo " Seed model: ${SEED_MODEL}"
echo " LLM: ${LLM_MODELS}"
echo " Run dir: ${RUN_DIR}"
echo " Logs: ${LOG_DIR}/"
echo "========================================"
echo ""
# ============================================================================
# Worker function: run one problem
# ============================================================================
run_problem() {
local pid="$1"
# Set env var so evaluator knows which problem to evaluate
export FRONTIER_CS_PROBLEM_ID="${pid}"
${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
--experiment-name "${EXP_NAME}" \
--problem-id "${pid}" \
--seed-model "${SEED_MODEL}" \
--num-generations "${GENS}" \
--max-parallel-jobs 1 \
--edit-backend single_shot_patch \
--llm-models ${LLM_MODELS} \
--run-dir "${RUN_DIR}" \
--use-wandb \
--wandb-project frontier-cs \
--wandb-tags frontier_cs baseline problem_${pid} \
--verbose \
> "${LOG_DIR}/problem_${pid}.log" 2>&1
local status=$?
if [ ${status} -eq 0 ]; then
echo "DONE problem ${pid}"
else
echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)"
fi
return ${status}
}
export -f run_problem
export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR
# ============================================================================
# Start progress monitor in background
# ============================================================================
${PYTHON} scripts/dev/monitor_frontier_cs.py \
--results-dir "${RUN_DIR}" --interval 30 &
MONITOR_PID=$!
trap "kill ${MONITOR_PID} 2>/dev/null || true" EXIT
# ============================================================================
# Run in parallel using background jobs
# ============================================================================
RUNNING=0
DONE=0
FAILED=0
PIDS_RUNNING=() # bash PIDs of background jobs
PIDS_PROBLEM=() # problem IDs corresponding to background jobs
for PID in "${PIDS[@]}"; do
# Wait if we've hit concurrency limit
while [ ${RUNNING} -ge ${CONCURRENCY} ]; do
# Wait for any child to finish
wait -n 2>/dev/null || true
# Check which jobs finished
NEW_RUNNING=0
NEW_PIDS_RUNNING=()
NEW_PIDS_PROBLEM=()
for i in "${!PIDS_RUNNING[@]}"; do
if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then
NEW_RUNNING=$((NEW_RUNNING + 1))
NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}")
NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}")
else
# Job finished, check exit code
wait "${PIDS_RUNNING[$i]}" 2>/dev/null
if [ $? -eq 0 ]; then
DONE=$((DONE + 1))
else
FAILED=$((FAILED + 1))
fi
fi
done
RUNNING=${NEW_RUNNING}
PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}")
PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}")
done
# Launch new job
echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]"
run_problem "${PID}" &
PIDS_RUNNING+=($!)
PIDS_PROBLEM+=("${PID}")
RUNNING=$((RUNNING + 1))
done
# Wait for remaining jobs
echo ""
echo "Waiting for remaining ${RUNNING} jobs..."
for i in "${!PIDS_RUNNING[@]}"; do
wait "${PIDS_RUNNING[$i]}" 2>/dev/null
if [ $? -eq 0 ]; then
DONE=$((DONE + 1))
else
FAILED=$((FAILED + 1))
fi
done
echo ""
echo "========================================"
echo "Parallel batch complete"
echo " Succeeded: ${DONE}"
echo " Failed: ${FAILED}"
echo " Total: ${#PIDS[@]}"
echo " Logs: ${LOG_DIR}/"
echo "========================================"