oracle / pre_cache.sh
zirobtc's picture
Upload folder using huggingface_hub
d195287 verified
# =========================
# Hardcoded cache settings
# =========================
CONTEXT_LENGTH=4096
MIN_TRADES=10
SAMPLES_PER_TOKEN=1
TARGET_CONTEXTS_PER_CLASS=10
NUM_WORKERS=1
OUTPUT_DIR="data/cache"
# Label horizons in seconds, relative to each sampled T_cutoff.
# Tuned for memecoin timing distribution (less micro-noise, more actionable windows):
# [300, 900, 1800, 3600, 7200] = [5m, 15m, 30m, 60m, 120m]
HORIZONS_SECONDS=(300 900 1800 3600 7200)
QUANTILES=(0.1 0.5 0.9)
echo "========================================"
echo "Apollo Dataset Pre-Caching (Context Mode)"
echo "========================================"
echo "Context Length (H/B/H threshold): $CONTEXT_LENGTH"
echo "Min Trades (T_cutoff threshold): $MIN_TRADES"
echo "Samples per Token: $SAMPLES_PER_TOKEN"
echo "Target Contexts per Class: $TARGET_CONTEXTS_PER_CLASS"
echo "Num Workers: $NUM_WORKERS"
echo "Horizons (sec): ${HORIZONS_SECONDS[*]}"
echo "Quantiles: ${QUANTILES[*]}"
echo "Output Directory: $OUTPUT_DIR"
echo "========================================"
echo "Starting dataset caching..."
mkdir -p "$OUTPUT_DIR"
python3 scripts/cache_dataset.py \
--output_dir "$OUTPUT_DIR" \
--context_length "$CONTEXT_LENGTH" \
--min_trades "$MIN_TRADES" \
--samples_per_token "$SAMPLES_PER_TOKEN" \
--target_contexts_per_class "$TARGET_CONTEXTS_PER_CLASS" \
--num_workers "$NUM_WORKERS" \
--horizons_seconds "${HORIZONS_SECONDS[@]}" \
--quantiles "${QUANTILES[@]}" \
"$@"
echo "Done!"
echo "Cache saved to: $OUTPUT_DIR"