distill-pipeline / scripts /run_instruct_continuous.sh
htaf's picture
added new instruct pipeline for faster generation
2739b3a
#!/usr/bin/env bash
set -euo pipefail
# Continuous instruct-only pipeline runner.
# - Uses separate cache/output to avoid mixing with thinking pipeline
# - Random-walk over chunks
# - No limit: processes all available chunks/questions; loop restarts after completion
#
# Required: set INSTRUCT_GENERATOR_MODEL (and optionally INSTRUCT_GENERATOR_PROVIDER).
# Stop with Ctrl+C.
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# Load .env if present
if [[ -f "$ROOT_DIR/.env" ]]; then
set -a
source "$ROOT_DIR/.env"
set +a
fi
if [[ -z "${INSTRUCT_GENERATOR_MODEL:-}" ]]; then
echo "❌ Please set INSTRUCT_GENERATOR_MODEL to your instruct model." >&2
exit 1
fi
while true; do
INSTRUCT_PIPELINE=1 \
INSTRUCT_GENERATOR_MODEL="$INSTRUCT_GENERATOR_MODEL" \
INSTRUCT_GENERATOR_PROVIDER="${INSTRUCT_GENERATOR_PROVIDER:-${GENERATOR_PROVIDER:-ollama}}" \
PIPELINE_CACHE_DIR="${PIPELINE_CACHE_DIR:-$ROOT_DIR/data/cache_instruct}" \
PIPELINE_SEED_MODE=question-first \
PIPELINE_RANDOM_WALK=1 \
QUESTION_MAX_PER_CHUNK="${QUESTION_MAX_PER_CHUNK:-5}" \
npm run pipeline -- \
--out "${INSTRUCT_OUT:-$ROOT_DIR/gold/pipeline_gold_instruct.jsonl}" \
--verbose
echo "Instruct run finished at $(date). Sleeping 10s before next loop..."
sleep 10
done