shinka-backup / scripts /dev /run_frontier_cs_parallel.sh

Add files using upload-large-folder tool

3f6526a verified 18 days ago

5.82 kB

	#!/bin/bash
	# Parallel batch run: multiple Frontier-CS problems concurrently.
	#
	# Runs N problems in parallel without eval service (baseline mode).
	# Each problem gets its own process with FRONTIER_CS_PROBLEM_ID env var.
	#
	# Usage:
	# ./scripts/dev/run_frontier_cs_parallel.sh # problems 0-49, 20 parallel
	# ./scripts/dev/run_frontier_cs_parallel.sh 0 49 20 # same, explicit
	# ./scripts/dev/run_frontier_cs_parallel.sh 50 99 10 # problems 50-99, 10 parallel

	set -euo pipefail
	cd "$(dirname "$0")/../.."

	PYTHON=".venv/bin/python"

	# ============================================================================
	# Configuration
	# ============================================================================
	PID_START="${1:-0}"
	PID_END="${2:-49}"
	CONCURRENCY="${3:-20}"

	GENS=50
	SEED_MODEL="gemini3pro"
	LLM_MODELS="native-gemini-3-flash-preview"

	TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
	EXP_NAME="vanilla_g${GENS}"
	RUN_DIR="results/frontier_cs_algorithmic/${EXP_NAME}_${TIMESTAMP}"

	PROBLEMS_DIR="tasks/Frontier-CS/algorithmic/problems"
	SOLUTIONS_DIR="tasks/Frontier-CS/algorithmic/solutions"
	LOG_DIR="logs/frontier_cs_parallel"
	mkdir -p "${LOG_DIR}" "${RUN_DIR}"

	# ============================================================================
	# Collect valid problem IDs
	# ============================================================================
	PIDS=()
	for pid in $(ls "${PROBLEMS_DIR}" \| sort -n); do
	if [ "${pid}" -ge "${PID_START}" ] 2>/dev/null && [ "${pid}" -le "${PID_END}" ] 2>/dev/null; then
	if [ -d "${SOLUTIONS_DIR}/${pid}" ]; then
	PIDS+=("${pid}")
	fi
	fi
	done

	echo "========================================"
	echo "Frontier-CS Parallel Batch (no eval agent)"
	echo "========================================"
	echo " Problems: ${PID_START}-${PID_END} (${#PIDS[@]} valid)"
	echo " Concurrency: ${CONCURRENCY}"
	echo " Generations: ${GENS}"
	echo " Seed model: ${SEED_MODEL}"
	echo " LLM: ${LLM_MODELS}"
	echo " Run dir: ${RUN_DIR}"
	echo " Logs: ${LOG_DIR}/"
	echo "========================================"
	echo ""

	# ============================================================================
	# Worker function: run one problem
	# ============================================================================
	run_problem() {
	local pid="$1"

	# Set env var so evaluator knows which problem to evaluate
	export FRONTIER_CS_PROBLEM_ID="${pid}"

	${PYTHON} tasks/frontier_cs_entry/run_experiment.py \
	--experiment-name "${EXP_NAME}" \
	--problem-id "${pid}" \
	--seed-model "${SEED_MODEL}" \
	--num-generations "${GENS}" \
	--max-parallel-jobs 1 \
	--edit-backend single_shot_patch \
	--llm-models ${LLM_MODELS} \
	--run-dir "${RUN_DIR}" \
	--use-wandb \
	--wandb-project frontier-cs \
	--wandb-tags frontier_cs baseline problem_${pid} \
	--verbose \
	> "${LOG_DIR}/problem_${pid}.log" 2>&1

	local status=$?
	if [ ${status} -eq 0 ]; then
	echo "DONE problem ${pid}"
	else
	echo "FAIL problem ${pid} (exit ${status}, see ${LOG_DIR}/problem_${pid}.log)"
	fi
	return ${status}
	}

	export -f run_problem
	export PYTHON GENS EXP_NAME SEED_MODEL LLM_MODELS LOG_DIR RUN_DIR

	# ============================================================================
	# Start progress monitor in background
	# ============================================================================
	${PYTHON} scripts/dev/monitor_frontier_cs.py \
	--results-dir "${RUN_DIR}" --interval 30 &
	MONITOR_PID=$!
	trap "kill ${MONITOR_PID} 2>/dev/null \|\| true" EXIT

	# ============================================================================
	# Run in parallel using background jobs
	# ============================================================================
	RUNNING=0
	DONE=0
	FAILED=0
	PIDS_RUNNING=() # bash PIDs of background jobs
	PIDS_PROBLEM=() # problem IDs corresponding to background jobs

	for PID in "${PIDS[@]}"; do
	# Wait if we've hit concurrency limit
	while [ ${RUNNING} -ge ${CONCURRENCY} ]; do
	# Wait for any child to finish
	wait -n 2>/dev/null \|\| true

	# Check which jobs finished
	NEW_RUNNING=0
	NEW_PIDS_RUNNING=()
	NEW_PIDS_PROBLEM=()
	for i in "${!PIDS_RUNNING[@]}"; do
	if kill -0 "${PIDS_RUNNING[$i]}" 2>/dev/null; then
	NEW_RUNNING=$((NEW_RUNNING + 1))
	NEW_PIDS_RUNNING+=("${PIDS_RUNNING[$i]}")
	NEW_PIDS_PROBLEM+=("${PIDS_PROBLEM[$i]}")
	else
	# Job finished, check exit code
	wait "${PIDS_RUNNING[$i]}" 2>/dev/null
	if [ $? -eq 0 ]; then
	DONE=$((DONE + 1))
	else
	FAILED=$((FAILED + 1))
	fi
	fi
	done
	RUNNING=${NEW_RUNNING}
	PIDS_RUNNING=("${NEW_PIDS_RUNNING[@]+"${NEW_PIDS_RUNNING[@]}"}")
	PIDS_PROBLEM=("${NEW_PIDS_PROBLEM[@]+"${NEW_PIDS_PROBLEM[@]}"}")
	done

	# Launch new job
	echo "START problem ${PID} [running: ${RUNNING}, done: ${DONE}, failed: ${FAILED}]"
	run_problem "${PID}" &
	PIDS_RUNNING+=($!)
	PIDS_PROBLEM+=("${PID}")
	RUNNING=$((RUNNING + 1))
	done

	# Wait for remaining jobs
	echo ""
	echo "Waiting for remaining ${RUNNING} jobs..."
	for i in "${!PIDS_RUNNING[@]}"; do
	wait "${PIDS_RUNNING[$i]}" 2>/dev/null
	if [ $? -eq 0 ]; then
	DONE=$((DONE + 1))
	else
	FAILED=$((FAILED + 1))
	fi
	done

	echo ""
	echo "========================================"
	echo "Parallel batch complete"
	echo " Succeeded: ${DONE}"
	echo " Failed: ${FAILED}"
	echo " Total: ${#PIDS[@]}"
	echo " Logs: ${LOG_DIR}/"
	echo "========================================"