#!/bin/bash #SBATCH --exclude=nid005159 #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --mem=256G #SBATCH -p small-g #SBATCH -t 2-0:00:00 #SBATCH --gpus-per-node=mi250:1 #SBATCH --exclusive=user #SBATCH --hint=nomultithread #SBATCH --account=project_462000119 #SBATCH -o logs/%j.out #SBATCH -e logs/%j.err # if run without sbatch, invoke here if [ -z $SLURM_JOB_ID ]; then mkdir -p logs sbatch "$0" exit fi set -euo pipefail # symlink logs/latest_eval.out and logs/latest_eval.err ln -f -s $SLURM_JOB_ID.out logs/latest_eval.out ln -f -s $SLURM_JOB_ID.err logs/latest_eval.err # Data #CHECKPOINT_PATH=/scratch/project_462000119/muennighoff/nov-2022-optimization/checkpoints/global_step10 #VARIANT=global_step10 CHECKPOINT_PATH=lm1-220m/global_step14324 VARIANT=lm1-220m CHECKPOINT_PATH=lm1-220m-7b5-oscar/global_step14324 VARIANT=lm1-220m-7b5-oscar #CHECKPOINT_PATH=lm1-280m/global_step11269 #VARIANT=lm1-280m-5b9 #CHECKPOINT_PATH=lm1-280m-5b9-oscar/global_step11269 #VARIANT=lm1-280m-5b9-oscar CHECKPOINT_PATH=lm1-1b1-21b-oscar/global_step39672 VARIANT=lm1-1b1-21b-oscar #CHECKPOINT_PATH=lm1-1b1-21b/global_step39672 #VARIANT=lm1-1b1-21b #CHECKPOINT_PATH=lm1-2b8-55b-oscar/global_step52452 #VARIANT=lm1-2b8-55b-oscar #CHECKPOINT_PATH=lm1-2b8-55b/global_step52452 #VARIANT=lm1-2b8-55b #CHECKPOINT_PATH=lm1-2b8-55b-oscar/global_step52452 #VARIANT=lm1-2b8-55b-oscar #CHECKPOINT_PATH=lm1-3b9-77b/global_step73814 #VARIANT=lm1-3b9-77b #CHECKPOINT_PATH=lm1-1b1-21b-c4/global_step39672 #VARIANT=lm1-1b1-21b-c4 # tensorboard_2b855b11bc4 tensorboard_2b855b14bc4 tensorboard_2b855b18bc4 tensorboard_2b855b28bc4 tensorboard_2b855b9bc4 #2b855b50c4py 2b855b60c4py 2b855b70c4py 2b855b80c4py 2b855b90c4py VARIANT=2b855b70c4py CHECKPOINT_PATH=lm1-2b8-55b-c4py/$VARIANT/global_step52452 #2b855b11bc4 2b855b14bc4 2b855b18bc4 2b855b28bc4 2b855b9bc4 #VARIANT=2b855b9boscar #CHECKPOINT_PATH=lm1-2b8-55b-oscar-repetitions/$VARIANT/global_step52452 #VARIANT=realtasky #CHECKPOINT_PATH=checkpoints_2b855brealtasky/global_step52452 #CHECKPOINT_PATH=lm1-2b8-55b-c4-repetitions/2b855b55bc4/global_step52452 CHECKPOINT_PATH=checkpoints_2b855b55bc4ul2valfast/global_step52452 VARIANT=ul2valfast CHECKPOINT_PATH=lm2-2b8-55b-c4-new/global_step52452 VARIANT=ul2new export HF_DATASETS_OFFLINE=1 export HF_DATASETS_CACHE=/scratch/project_462000119/ds_cache VOCAB_FILE="gpt2/vocab.json" MERGE_FILE="gpt2/merges.txt" PP_SIZE=1 TP_SIZE=1 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS # make as big as it can fit into gpu w/o OOM, but not too close to 100% EVAL_MICRO_BATCH_SIZE=1 MICRO_BS_MULTIPLIER=1 # Model parameters SEQ_LEN=2048 # Dummy arguments MEGATRON_REQUIRED_ARGS=" \ --num-layers -1 \ --hidden-size -1 \ --num-attention-heads -1 \ --seq-length -1 \ --max-position-embeddings -1 \ " ZERO_STAGE=0 mkdir -p ds_configs DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" cat < $DS_CONFIG_PATH { "train_micro_batch_size_per_gpu": 1, "train_batch_size": 1, "gradient_clipping": 1.0, "zero_optimization": { "stage": $ZERO_STAGE }, "bf16": { "enabled": true }, "steps_per_print": 2000, "wall_clock_breakdown": false } EOF DEEPSPEED_ARGS=" \ --deepspeed \ --deepspeed_config $DS_CONFIG_PATH \ --zero-stage $ZERO_STAGE \ " CMD="Megatron-DeepSpeed/tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ --results_path $VARIANT-results.json \ --tensor-model-parallel-size $TP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \ --vocab-file $VOCAB_FILE \ --merge-file $MERGE_FILE \ --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ --no-load-optim \ --no-load-rng \ --bf16 \ --inference \ --seq-length $SEQ_LEN \ --task_list anli_r1,anli_r2,anli_r3,cb,copa,hellaswag,rte,winogrande,storycloze_2016,boolq,arc_easy,arc_challenge,sciq,piqa \ --intermed_results \ --adaptive_seq_len \ --add_denoiser \ --micro_bs_multiplier $MICRO_BS_MULTIPLIER \ $MEGATRON_REQUIRED_ARGS \ $DEEPSPEED_ARGS \ " echo $CMD echo "START $SLURM_JOBID: $(date)" srun --label launch.sh $CMD echo "END $SLURM_JOBID: $(date)"