diff --git "a/8b7178b13b/3583606.out" "b/8b7178b13b/3583606.out" new file mode 100644--- /dev/null +++ "b/8b7178b13b/3583606.out" @@ -0,0 +1,14937 @@ +Model parameters: d_model 4096 ffw_size 16384 kv_size 128 n_heads 32 n_layers 42 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 4 --pipeline-model-parallel-size 4 --num-layers 42 --hidden-size 4096 --num-attention-heads 32 --kv-channels 128 --ffn-hidden-size 16384 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 1 --global-batch-size 512 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-8b7178b13bval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --override-lr-scheduler --reset-progress --no-load-optim --log-interval 10 --save-interval 5000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_8b7178b13bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save lm1-8b7-178b-c4-repetitions/8b7178b13b --load lm1-8b7-178b-c4-repetitions/8b7178b13b --train-weighted-split-paths-path train400m.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --num-workers 0 --valid-num-workers 0 --deepspeed --deepspeed_config ds_configs/3583606.json --zero-stage 0 +START 3583606: Thu 25 May 2023 01:34:18 PM EEST + 0: + 0: + 0: ======================= ROCm System Management Interface ======================= + 0: ================================= Concise Info ================================= + 0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 0: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 2 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 4 44.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: 6 35.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 0: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 0: ================================================================================ + 0: ============================= End of ROCm SMI Log ============================== +12: +12: +12: ======================= ROCm System Management Interface ======================= +12: ================================= Concise Info ================================= +12: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +12: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 2 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 4 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: 6 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +12: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +12: ================================================================================ +12: ============================= End of ROCm SMI Log ============================== + 4: + 4: + 4: ======================= ROCm System Management Interface ======================= + 4: ================================= Concise Info ================================= + 4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 4: 0 41.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 2 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 4 39.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: 6 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 4: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 4: ================================================================================ + 4: ============================= End of ROCm SMI Log ============================== +13: +13: +13: ======================= ROCm System Management Interface ======================= +13: ================================= Concise Info ================================= +13: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +13: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 2 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 4 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: 6 39.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +13: 7 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +13: ================================================================================ +13: ============================= End of ROCm SMI Log ============================== +16: +16: +16: ======================= ROCm System Management Interface ======================= +16: ================================= Concise Info ================================= +16: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +16: 0 41.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 2 38.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 4 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: 6 35.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +16: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +16: ================================================================================ +16: ============================= End of ROCm SMI Log ============================== + 8: + 8: + 8: ======================= ROCm System Management Interface ======================= + 8: ================================= Concise Info ================================= + 8: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 8: 0 46.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 2 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 4 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: 6 36.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 8: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 8: ================================================================================ + 8: ============================= End of ROCm SMI Log ============================== +18: +18: +18: ======================= ROCm System Management Interface ======================= +18: ================================= Concise Info ================================= +18: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +18: 0 48.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 2 42.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 4 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: 6 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +18: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +18: ================================================================================ +18: ============================= End of ROCm SMI Log ============================== +26: +26: +26: ======================= ROCm System Management Interface ======================= +26: ================================= Concise Info ================================= +26: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +26: 0 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 2 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 4 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: 6 36.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +26: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +26: ================================================================================ +26: ============================= End of ROCm SMI Log ============================== +19: +19: +19: ======================= ROCm System Management Interface ======================= +19: ================================= Concise Info ================================= +19: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +19: 0 45.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 2 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 4 41.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: 6 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +19: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +19: ================================================================================ +19: ============================= End of ROCm SMI Log ============================== +31: +31: +31: ======================= ROCm System Management Interface ======================= +31: ================================= Concise Info ================================= +31: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +31: 0 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 2 39.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 4 43.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: 6 32.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +31: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +31: ================================================================================ +31: ============================= End of ROCm SMI Log ============================== +20: +20: +20: ======================= ROCm System Management Interface ======================= +20: ================================= Concise Info ================================= +20: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +20: 0 43.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 2 40.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 4 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: 6 35.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +20: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +20: ================================================================================ +20: ============================= End of ROCm SMI Log ============================== +27: +27: +27: ======================= ROCm System Management Interface ======================= +27: ================================= Concise Info ================================= +27: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +27: 0 42.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 2 38.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 4 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: 6 36.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +27: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +27: ================================================================================ +27: ============================= End of ROCm SMI Log ============================== + 1: + 1: + 1: ======================= ROCm System Management Interface ======================= + 1: ================================= Concise Info ================================= + 1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 1: 0 47.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 2 44.0c 80.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 4 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: 6 45.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 1: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 1: ================================================================================ + 1: ============================= End of ROCm SMI Log ============================== + 6: + 6: + 6: ======================= ROCm System Management Interface ======================= + 6: ================================= Concise Info ================================= + 6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 6: 0 47.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 2 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 4 43.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: 6 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 6: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 6: ================================================================================ + 6: ============================= End of ROCm SMI Log ============================== +15: +15: +15: ======================= ROCm System Management Interface ======================= +15: ================================= Concise Info ================================= +15: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +15: 0 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 2 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 4 40.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: 6 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +15: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +15: ================================================================================ +15: ============================= End of ROCm SMI Log ============================== +10: +10: +10: ======================= ROCm System Management Interface ======================= +10: ================================= Concise Info ================================= +10: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +10: 0 46.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 2 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: 6 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +10: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +10: ================================================================================ +10: ============================= End of ROCm SMI Log ============================== +11: +11: +11: ======================= ROCm System Management Interface ======================= +11: ================================= Concise Info ================================= +11: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +11: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 2 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 4 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: 6 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +11: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +11: ================================================================================ +11: ============================= End of ROCm SMI Log ============================== + 9: + 9: + 9: ======================= ROCm System Management Interface ======================= + 9: ================================= Concise Info ================================= + 9: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 9: 0 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 2 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 4 39.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 5 54.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: 6 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 9: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 9: ================================================================================ + 9: ============================= End of ROCm SMI Log ============================== + 5: + 5: + 5: ======================= ROCm System Management Interface ======================= + 5: ================================= Concise Info ================================= + 5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 5: 0 50.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 2 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 4 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: 6 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 5: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 5: ================================================================================ + 5: ============================= End of ROCm SMI Log ============================== +17: +17: +17: ======================= ROCm System Management Interface ======================= +17: ================================= Concise Info ================================= +17: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +17: 0 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 2 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 4 45.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: 6 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +17: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +17: ================================================================================ +17: ============================= End of ROCm SMI Log ============================== +24: +24: +24: ======================= ROCm System Management Interface ======================= +24: ================================= Concise Info ================================= +24: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +24: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 2 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 4 51.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: 6 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +24: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +24: ================================================================================ +24: ============================= End of ROCm SMI Log ============================== +14: +14: +14: ======================= ROCm System Management Interface ======================= +14: ================================= Concise Info ================================= +14: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +14: 0 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 2 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 4 44.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: 6 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +14: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +14: ================================================================================ +14: ============================= End of ROCm SMI Log ============================== +28: +28: +28: ======================= ROCm System Management Interface ======================= +28: ================================= Concise Info ================================= +28: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +28: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 2 41.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 4 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: 6 36.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +28: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +28: ================================================================================ +28: ============================= End of ROCm SMI Log ============================== +25: +25: +25: ======================= ROCm System Management Interface ======================= +25: ================================= Concise Info ================================= +25: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +25: 0 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 2 37.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: 6 40.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +25: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +25: ================================================================================ +25: ============================= End of ROCm SMI Log ============================== + 2: + 2: + 2: ======================= ROCm System Management Interface ======================= + 2: ================================= Concise Info ================================= + 2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 2: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 2 46.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 4 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: 6 47.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 2: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 2: ================================================================================ + 2: ============================= End of ROCm SMI Log ============================== +29: +29: +29: ======================= ROCm System Management Interface ======================= +29: ================================= Concise Info ================================= +29: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +29: 0 44.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 2 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 4 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: 6 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +29: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +29: ================================================================================ +29: ============================= End of ROCm SMI Log ============================== +23: +23: +23: ======================= ROCm System Management Interface ======================= +23: ================================= Concise Info ================================= +23: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +23: 0 45.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 2 42.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 4 36.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: 6 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +23: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +23: ================================================================================ +23: ============================= End of ROCm SMI Log ============================== +21: +21: +21: ======================= ROCm System Management Interface ======================= +21: ================================= Concise Info ================================= +21: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +21: 0 41.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 2 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 3 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 4 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: 6 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +21: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +21: ================================================================================ +21: ============================= End of ROCm SMI Log ============================== + 7: + 7: + 7: ======================= ROCm System Management Interface ======================= + 7: ================================= Concise Info ================================= + 7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 7: 0 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 2 40.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 4 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: 6 35.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 7: 7 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 7: ================================================================================ + 7: ============================= End of ROCm SMI Log ============================== + 3: + 3: + 3: ======================= ROCm System Management Interface ======================= + 3: ================================= Concise Info ================================= + 3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% + 3: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 2 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 4 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: 6 39.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% + 3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% + 3: ================================================================================ + 3: ============================= End of ROCm SMI Log ============================== +30: +30: +30: ======================= ROCm System Management Interface ======================= +30: ================================= Concise Info ================================= +30: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +30: 0 51.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 2 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 4 48.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: 6 38.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +30: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +30: ================================================================================ +30: ============================= End of ROCm SMI Log ============================== +22: +22: +22: ======================= ROCm System Management Interface ======================= +22: ================================= Concise Info ================================= +22: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +22: 0 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 3 37.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 4 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: 6 43.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +22: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +22: ================================================================================ +22: ============================= End of ROCm SMI Log ============================== +31: Launching on nid006582 (31/32), master nid006551 port 9999, GPUs 8, CUDA: True + 6: Launching on nid006557 (6/32), master nid006551 port 9999, GPUs 8, CUDA: True + 9: Launching on nid006560 (9/32), master nid006551 port 9999, GPUs 8, CUDA: True + 2: Launching on nid006553 (2/32), master nid006551 port 9999, GPUs 8, CUDA: True +15: Launching on nid006566 (15/32), master nid006551 port 9999, GPUs 8, CUDA: True +20: Launching on nid006571 (20/32), master nid006551 port 9999, GPUs 8, CUDA: True +17: Launching on nid006568 (17/32), master nid006551 port 9999, GPUs 8, CUDA: True + 5: Launching on nid006556 (5/32), master nid006551 port 9999, GPUs 8, CUDA: True +12: Launching on nid006563 (12/32), master nid006551 port 9999, GPUs 8, CUDA: True +13: Launching on nid006564 (13/32), master nid006551 port 9999, GPUs 8, CUDA: True +11: Launching on nid006562 (11/32), master nid006551 port 9999, GPUs 8, CUDA: True +21: Launching on nid006572 (21/32), master nid006551 port 9999, GPUs 8, CUDA: True + 0: Launching on nid006551 (0/32), master nid006551 port 9999, GPUs 8, CUDA: True +10: Launching on nid006561 (10/32), master nid006551 port 9999, GPUs 8, CUDA: True + 1: Launching on nid006552 (1/32), master nid006551 port 9999, GPUs 8, CUDA: True +18: Launching on nid006569 (18/32), master nid006551 port 9999, GPUs 8, CUDA: True + 4: Launching on nid006555 (4/32), master nid006551 port 9999, GPUs 8, CUDA: True +28: Launching on nid006579 (28/32), master nid006551 port 9999, GPUs 8, CUDA: True + 8: Launching on nid006559 (8/32), master nid006551 port 9999, GPUs 8, CUDA: True +24: Launching on nid006575 (24/32), master nid006551 port 9999, GPUs 8, CUDA: True +25: Launching on nid006576 (25/32), master nid006551 port 9999, GPUs 8, CUDA: True +27: Launching on nid006578 (27/32), master nid006551 port 9999, GPUs 8, CUDA: True +16: Launching on nid006567 (16/32), master nid006551 port 9999, GPUs 8, CUDA: True +26: Launching on nid006577 (26/32), master nid006551 port 9999, GPUs 8, CUDA: True +19: Launching on nid006570 (19/32), master nid006551 port 9999, GPUs 8, CUDA: True +23: Launching on nid006574 (23/32), master nid006551 port 9999, GPUs 8, CUDA: True +30: Launching on nid006581 (30/32), master nid006551 port 9999, GPUs 8, CUDA: True +14: Launching on nid006565 (14/32), master nid006551 port 9999, GPUs 8, CUDA: True +22: Launching on nid006573 (22/32), master nid006551 port 9999, GPUs 8, CUDA: True +29: Launching on nid006580 (29/32), master nid006551 port 9999, GPUs 8, CUDA: True + 7: Launching on nid006558 (7/32), master nid006551 port 9999, GPUs 8, CUDA: True + 3: Launching on nid006554 (3/32), master nid006551 port 9999, GPUs 8, CUDA: True + 0: using world size: 256, data-parallel-size: 16, tensor-model-parallel size: 4, pipeline-model-parallel size: 4 + 0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. + 0: using torch.bfloat16 for parameters ... + 0: ------------------------ arguments ------------------------ + 0: abort_on_unmet_fused_kernel_constraints ......... False + 0: accumulate_allreduce_grads_in_fp32 .............. True + 0: adam_beta1 ...................................... 0.9 + 0: adam_beta2 ...................................... 0.999 + 0: adam_eps ........................................ 1e-08 + 0: adlr_autoresume ................................. False + 0: adlr_autoresume_interval ........................ 1000 + 0: apply_query_key_layer_scaling ................... True + 0: apply_residual_connection_post_layernorm ........ False + 0: attention_dropout ............................... 0.1 + 0: attention_softmax_in_fp32 ....................... False + 0: bert_binary_head ................................ True + 0: bert_load ....................................... None + 0: bf16 ............................................ True + 0: bias_dropout_fusion ............................. True + 0: bias_gelu_fusion ................................ True + 0: biencoder_projection_dim ........................ 0 + 0: biencoder_shared_query_context_model ............ False + 0: block_data_path ................................. None + 0: checkpoint_activations .......................... False + 0: checkpoint_in_cpu ............................... False + 0: checkpoint_num_layers ........................... 1 + 0: clip_grad ....................................... 1.0 + 0: codecarbon_dir .................................. None + 0: consumed_train_samples .......................... 0 + 0: consumed_train_tokens ........................... 0 + 0: consumed_valid_samples .......................... 0 + 0: contigious_checkpointing ........................ False + 0: cpu_optimizer ................................... False + 0: cpu_torch_adam .................................. False + 0: curriculum_learning ............................. False + 0: data_impl ....................................... mmap + 0: data_parallel_size .............................. 16 + 0: data_path ....................................... None + 0: dataloader_type ................................. single + 0: DDP_impl ........................................ local + 0: decoder_seq_length .............................. None + 0: deepscale ....................................... False + 0: deepscale_config ................................ None + 0: deepspeed ....................................... True + 0: deepspeed_activation_checkpointing .............. False + 0: deepspeed_config ................................ ds_configs/3583606.json + 0: deepspeed_mpi ................................... False + 0: distribute_checkpointed_activations ............. False + 0: distributed_backend ............................. nccl + 0: embed_layernorm ................................. False + 0: embedding_path .................................. None + 0: encoder_seq_length .............................. 2048 + 0: eod_mask_loss ................................... False + 0: eval_interval ................................... 1 + 0: eval_iters ...................................... 100 + 0: eval_only ....................................... True + 0: evidence_data_path .............................. None + 0: exit_duration_in_mins ........................... None + 0: exit_interval ................................... None + 0: ffn_hidden_size ................................. 16384 + 0: finetune ........................................ False + 0: fp16 ............................................ False + 0: fp16_lm_cross_entropy ........................... False + 0: fp32_residual_connection ........................ False + 0: gigaflos_no_embeds .............................. 0 + 0: global_batch_size ............................... 512 + 0: glu_activation .................................. None + 0: hidden_dropout .................................. 0.1 + 0: hidden_size ..................................... 4096 + 0: hysteresis ...................................... 2 + 0: ict_head_size ................................... None + 0: ict_load ........................................ None + 0: img_dim ......................................... 224 + 0: indexer_batch_size .............................. 128 + 0: indexer_log_interval ............................ 1000 + 0: inference ....................................... False + 0: init_method_std ................................. 0.02 + 0: init_method_xavier_uniform ...................... False + 0: initial_loss_scale .............................. 4294967296 + 0: kill_switch_path ................................ kill-switch-8b7178b13bval + 0: kv_channels ..................................... 128 + 0: layer_norm_fusion ............................... True + 0: layernorm_epsilon ............................... 1e-05 + 0: lazy_mpu_init ................................... None + 0: load ............................................ lm1-8b7-178b-c4-repetitions/8b7178b13b + 0: local_rank ...................................... None + 0: log_batch_size_to_tensorboard ................... True + 0: log_interval .................................... 10 + 0: log_learning_rate_to_tensorboard ................ True + 0: log_level ....................................... None + 0: log_level_replica ............................... None + 0: log_loss_scale_to_tensorboard ................... True + 0: log_num_zeros_in_grad ........................... False + 0: log_params_norm ................................. False + 0: log_path ........................................ None + 0: log_timers_to_tensorboard ....................... True + 0: log_validation_ppl_to_tensorboard ............... True + 0: loss_on_targets_only ............................ False + 0: loss_scale ...................................... None + 0: loss_scale_window ............................... 1000 + 0: lr .............................................. 0.0002 + 0: lr_decay_iters .................................. None + 0: lr_decay_samples ................................ 1 + 0: lr_decay_style .................................. cosine + 0: lr_decay_tokens ................................. None + 0: lr_warmup_fraction .............................. None + 0: lr_warmup_iters ................................. 0 + 0: lr_warmup_samples ............................... 0 + 0: make_vocab_size_divisible_by .................... 128 + 0: mask_prob ....................................... 0.15 + 0: masked_softmax_fusion ........................... True + 0: max_position_embeddings ......................... 2048 + 0: mean_noise_span_length .......................... None + 0: memory_centric_tiled_linear ..................... False + 0: merge_file ...................................... gpt2/merges.txt + 0: micro_batch_size ................................ 1 + 0: min_loss_scale .................................. 1.0 + 0: min_lr .......................................... 2e-05 + 0: mmap_warmup ..................................... False + 0: no_load_optim ................................... True + 0: no_load_rng ..................................... None + 0: no_save_optim ................................... None + 0: no_save_rng ..................................... None + 0: noise_density ................................... None + 0: num_attention_heads ............................. 32 + 0: num_channels .................................... 3 + 0: num_classes ..................................... 1000 + 0: num_layers ...................................... 42 + 0: num_layers_per_virtual_pipeline_stage ........... None + 0: num_workers ..................................... 0 + 0: onnx_safe ....................................... None + 0: openai_gelu ..................................... False + 0: optimizer ....................................... adam + 0: optimizer_fusion ................................ True + 0: override_lr_scheduler ........................... True + 0: pad_vocab_size_to ............................... None + 0: params_dtype .................................... torch.bfloat16 + 0: partition_activations ........................... False + 0: patch_dim ....................................... 16 + 0: pipeline_model_parallel_size .................... 4 + 0: position_embedding_type ......................... PositionEmbeddingType.absolute + 0: pp_partition_method ............................. None + 0: profile_backward ................................ False + 0: query_in_block_prob ............................. 0.1 + 0: rampup_batch_size ............................... None + 0: rank ............................................ 0 + 0: remote_device ................................... none + 0: reset_attention_mask ............................ False + 0: reset_position_ids .............................. False + 0: reset_progress .................................. True + 0: retriever_report_topk_accuracies ................ [] + 0: retriever_score_scaling ......................... False + 0: retriever_seq_length ............................ 256 + 0: reweight_loss_based_on_position_frequency ....... False + 0: sample_rate ..................................... 1.0 + 0: save ............................................ lm1-8b7-178b-c4-repetitions/8b7178b13b + 0: save_interval ................................... 5000 + 0: scatter_gather_tensors_in_pipeline .............. True + 0: scattered_embeddings ............................ False + 0: seed ............................................ 1234 + 0: seq_length ...................................... 2048 + 0: sgd_momentum .................................... 0.9 + 0: short_seq_prob .................................. 0.1 + 0: skip_train_iteration_range ...................... None + 0: split ........................................... None + 0: split_transformers .............................. False + 0: sync_tp_duplicated_parameters ................... False + 0: synchronize_each_layer .......................... False + 0: tensor_model_parallel_size ...................... 4 + 0: tensorboard_dir ................................. tensorboard_8b7178b13bval + 0: tensorboard_log_interval ........................ 1 + 0: tensorboard_queue_size .......................... 5 + 0: test_weighted_split_paths ....................... None + 0: test_weighted_split_paths_path .................. None + 0: tile_factor ..................................... 1 + 0: titles_data_path ................................ None + 0: tokenizer_name_or_path .......................... None + 0: tokenizer_type .................................. GPT2BPETokenizer + 0: train_iters ..................................... None + 0: train_samples ................................... 1 + 0: train_tokens .................................... None + 0: train_weighted_split_names ...................... ['train'] + 0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document']] + 0: train_weighted_split_paths_path ................. None + 0: train_weighted_split_splits ..................... [['0:1']] + 0: train_weighted_split_weights .................... [['1.0']] + 0: universal_checkpoint ............................ False + 0: use_bnb_optimizer ............................... False + 0: use_checkpoint_lr_scheduler ..................... False + 0: use_contiguous_buffers_in_ddp ................... True + 0: use_cpu_initialization .......................... None + 0: use_one_sent_docs ............................... False + 0: use_pin_memory .................................. False + 0: valid_num_workers ............................... 0 + 0: valid_weighted_split_names ...................... ['validation'] + 0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] + 0: valid_weighted_split_paths_path ................. None + 0: valid_weighted_split_splits ..................... [['0:1']] + 0: valid_weighted_split_weights .................... [['1.0']] + 0: virtual_pipeline_model_parallel_size ............ None + 0: vocab_extra_ids ................................. 0 + 0: vocab_file ...................................... gpt2/vocab.json + 0: weight_decay .................................... 0.1 + 0: world_size ...................................... 256 + 0: zero_allgather_bucket_size ...................... 0.0 + 0: zero_contigious_gradients ....................... False + 0: zero_reduce_bucket_size ......................... 0.0 + 0: zero_reduce_scatter ............................. False + 0: zero_stage ...................................... 0 + 0: -------------------- end of arguments --------------------- + 0: setting number of micro-batches to constant 32 + 0: > building GPT2BPETokenizer tokenizer ... + 0: > padded vocab (size: 50257) with 431 dummy tokens (new size: 50688) + 0: DeepSpeed general environment info: + 0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] + 0: torch version .................... 1.13.0+rocm5.2 + 0: torch cuda version ............... None + 0: torch hip version ................ 5.2.21151-afdc89f8 + 0: nvcc version ..................... None + 0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] + 0: deepspeed info ................... 0.7.5, unknown, unknown + 0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 + 0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** + 0: > initializing torch distributed ... + 0: [2023-05-25 13:37:21,573] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +31: > setting tensorboard ... + 0: > initializing tensor model parallel with size 4 + 0: > initializing pipeline model parallel with size 4 + 0: > setting random seeds to 1234 ... + 0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 + 0: > compiling dataset index builder ... + 0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: make: Nothing to be done for 'default'. + 0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' + 0: >>> done with dataset index builder. Compilation time: 0.109 seconds + 0: > compiling and loading fused kernels ... + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 87 + 0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.o scaled_upper_triang_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/opt/rocm/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 63 + 0: ninja: no work to do. + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] + 0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] + 0: Total number of unsupported CUDA function calls: 0 + 0: + 0: + 0: Total number of replaced kernel launches: 67 + 0: [1/1] c++ layer_norm_hip_kernel.cuda.o layer_norm_cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/opt/rocm/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so + 0: >>> done with compiling and loading fused kernels. Compilation time: 27.208 seconds + 0: time to initialize megatron (seconds): -0.166 + 0: [after megatron is initialized] datetime: 2023-05-25 13:37:51 + 0: building GPT model ... + 0: [2023-05-25 13:37:51,985] [INFO] [utils.py:827:see_memory_usage] Before Building Model + 0: [2023-05-25 13:37:51,986] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB + 0: [2023-05-25 13:37:51,986] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 40.87 GB, percent = 8.1% + 0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None + 0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=0, model=1): 1, ProcessCoord(pipe=0, data=0, model=2): 2, ProcessCoord(pipe=0, data=0, model=3): 3, ProcessCoord(pipe=0, data=1, model=0): 4, ProcessCoord(pipe=0, data=1, model=1): 5, ProcessCoord(pipe=0, data=1, model=2): 6, ProcessCoord(pipe=0, data=1, model=3): 7, ProcessCoord(pipe=0, data=2, model=0): 8, ProcessCoord(pipe=0, data=2, model=1): 9, ProcessCoord(pipe=0, data=2, model=2): 10, ProcessCoord(pipe=0, data=2, model=3): 11, ProcessCoord(pipe=0, data=3, model=0): 12, ProcessCoord(pipe=0, data=3, model=1): 13, ProcessCoord(pipe=0, data=3, model=2): 14, ProcessCoord(pipe=0, data=3, model=3): 15, ProcessCoord(pipe=0, data=4, model=0): 16, ProcessCoord(pipe=0, data=4, model=1): 17, ProcessCoord(pipe=0, data=4, model=2): 18, ProcessCoord(pipe=0, data=4, model=3): 19, ProcessCoord(pipe=0, data=5, model=0): 20, ProcessCoord(pipe=0, data=5, model=1): 21, ProcessCoord(pipe=0, data=5, model=2): 22, ProcessCoord(pipe=0, data=5, + 0: model=3): 23, ProcessCoord(pipe=0, data=6, model=0): 24, ProcessCoord(pipe=0, data=6, model=1): 25, ProcessCoord(pipe=0, data=6, model=2): 26, ProcessCoord(pipe=0, data=6, model=3): 27, ProcessCoord(pipe=0, data=7, model=0): 28, ProcessCoord(pipe=0, data=7, model=1): 29, ProcessCoord(pipe=0, data=7, model=2): 30, ProcessCoord(pipe=0, data=7, model=3): 31, ProcessCoord(pipe=0, data=8, model=0): 32, ProcessCoord(pipe=0, data=8, model=1): 33, ProcessCoord(pipe=0, data=8, model=2): 34, ProcessCoord(pipe=0, data=8, model=3): 35, ProcessCoord(pipe=0, data=9, model=0): 36, ProcessCoord(pipe=0, data=9, model=1): 37, ProcessCoord(pipe=0, data=9, model=2): 38, ProcessCoord(pipe=0, data=9, model=3): 39, ProcessCoord(pipe=0, data=10, model=0): 40, ProcessCoord(pipe=0, data=10, model=1): 41, ProcessCoord(pipe=0, data=10, model=2): 42, ProcessCoord(pipe=0, data=10, model=3): 43, ProcessCoord(pipe=0, data=11, model=0): 44, ProcessCoord(pipe=0, data=11, model=1): 45, ProcessCoord(pipe=0, data=11, model=2): 46, ProcessCoord( + 0: pipe=0, data=11, model=3): 47, ProcessCoord(pipe=0, data=12, model=0): 48, ProcessCoord(pipe=0, data=12, model=1): 49, ProcessCoord(pipe=0, data=12, model=2): 50, ProcessCoord(pipe=0, data=12, model=3): 51, ProcessCoord(pipe=0, data=13, model=0): 52, ProcessCoord(pipe=0, data=13, model=1): 53, ProcessCoord(pipe=0, data=13, model=2): 54, ProcessCoord(pipe=0, data=13, model=3): 55, ProcessCoord(pipe=0, data=14, model=0): 56, ProcessCoord(pipe=0, data=14, model=1): 57, ProcessCoord(pipe=0, data=14, model=2): 58, ProcessCoord(pipe=0, data=14, model=3): 59, ProcessCoord(pipe=0, data=15, model=0): 60, ProcessCoord(pipe=0, data=15, model=1): 61, ProcessCoord(pipe=0, data=15, model=2): 62, ProcessCoord(pipe=0, data=15, model=3): 63, ProcessCoord(pipe=1, data=0, model=0): 64, ProcessCoord(pipe=1, data=0, model=1): 65, ProcessCoord(pipe=1, data=0, model=2): 66, ProcessCoord(pipe=1, data=0, model=3): 67, ProcessCoord(pipe=1, data=1, model=0): 68, ProcessCoord(pipe=1, data=1, model=1): 69, ProcessCoord(pipe=1, data=1, mo + 0: del=2): 70, ProcessCoord(pipe=1, data=1, model=3): 71, ProcessCoord(pipe=1, data=2, model=0): 72, ProcessCoord(pipe=1, data=2, model=1): 73, ProcessCoord(pipe=1, data=2, model=2): 74, ProcessCoord(pipe=1, data=2, model=3): 75, ProcessCoord(pipe=1, data=3, model=0): 76, ProcessCoord(pipe=1, data=3, model=1): 77, ProcessCoord(pipe=1, data=3, model=2): 78, ProcessCoord(pipe=1, data=3, model=3): 79, ProcessCoord(pipe=1, data=4, model=0): 80, ProcessCoord(pipe=1, data=4, model=1): 81, ProcessCoord(pipe=1, data=4, model=2): 82, ProcessCoord(pipe=1, data=4, model=3): 83, ProcessCoord(pipe=1, data=5, model=0): 84, ProcessCoord(pipe=1, data=5, model=1): 85, ProcessCoord(pipe=1, data=5, model=2): 86, ProcessCoord(pipe=1, data=5, model=3): 87, ProcessCoord(pipe=1, data=6, model=0): 88, ProcessCoord(pipe=1, data=6, model=1): 89, ProcessCoord(pipe=1, data=6, model=2): 90, ProcessCoord(pipe=1, data=6, model=3): 91, ProcessCoord(pipe=1, data=7, model=0): 92, ProcessCoord(pipe=1, data=7, model=1): 93, ProcessCoord(pipe=1, da + 0: ta=7, model=2): 94, ProcessCoord(pipe=1, data=7, model=3): 95, ProcessCoord(pipe=1, data=8, model=0): 96, ProcessCoord(pipe=1, data=8, model=1): 97, ProcessCoord(pipe=1, data=8, model=2): 98, ProcessCoord(pipe=1, data=8, model=3): 99, ProcessCoord(pipe=1, data=9, model=0): 100, ProcessCoord(pipe=1, data=9, model=1): 101, ProcessCoord(pipe=1, data=9, model=2): 102, ProcessCoord(pipe=1, data=9, model=3): 103, ProcessCoord(pipe=1, data=10, model=0): 104, ProcessCoord(pipe=1, data=10, model=1): 105, ProcessCoord(pipe=1, data=10, model=2): 106, ProcessCoord(pipe=1, data=10, model=3): 107, ProcessCoord(pipe=1, data=11, model=0): 108, ProcessCoord(pipe=1, data=11, model=1): 109, ProcessCoord(pipe=1, data=11, model=2): 110, ProcessCoord(pipe=1, data=11, model=3): 111, ProcessCoord(pipe=1, data=12, model=0): 112, ProcessCoord(pipe=1, data=12, model=1): 113, ProcessCoord(pipe=1, data=12, model=2): 114, ProcessCoord(pipe=1, data=12, model=3): 115, ProcessCoord(pipe=1, data=13, model=0): 116, ProcessCoord(pipe=1, data=13 + 0: , model=1): 117, ProcessCoord(pipe=1, data=13, model=2): 118, ProcessCoord(pipe=1, data=13, model=3): 119, ProcessCoord(pipe=1, data=14, model=0): 120, ProcessCoord(pipe=1, data=14, model=1): 121, ProcessCoord(pipe=1, data=14, model=2): 122, ProcessCoord(pipe=1, data=14, model=3): 123, ProcessCoord(pipe=1, data=15, model=0): 124, ProcessCoord(pipe=1, data=15, model=1): 125, ProcessCoord(pipe=1, data=15, model=2): 126, ProcessCoord(pipe=1, data=15, model=3): 127, ProcessCoord(pipe=2, data=0, model=0): 128, ProcessCoord(pipe=2, data=0, model=1): 129, ProcessCoord(pipe=2, data=0, model=2): 130, ProcessCoord(pipe=2, data=0, model=3): 131, ProcessCoord(pipe=2, data=1, model=0): 132, ProcessCoord(pipe=2, data=1, model=1): 133, ProcessCoord(pipe=2, data=1, model=2): 134, ProcessCoord(pipe=2, data=1, model=3): 135, ProcessCoord(pipe=2, data=2, model=0): 136, ProcessCoord(pipe=2, data=2, model=1): 137, ProcessCoord(pipe=2, data=2, model=2): 138, ProcessCoord(pipe=2, data=2, model=3): 139, ProcessCoord(pipe=2, data=3, + 0: model=0): 140, ProcessCoord(pipe=2, data=3, model=1): 141, ProcessCoord(pipe=2, data=3, model=2): 142, ProcessCoord(pipe=2, data=3, model=3): 143, ProcessCoord(pipe=2, data=4, model=0): 144, ProcessCoord(pipe=2, data=4, model=1): 145, ProcessCoord(pipe=2, data=4, model=2): 146, ProcessCoord(pipe=2, data=4, model=3): 147, ProcessCoord(pipe=2, data=5, model=0): 148, ProcessCoord(pipe=2, data=5, model=1): 149, ProcessCoord(pipe=2, data=5, model=2): 150, ProcessCoord(pipe=2, data=5, model=3): 151, ProcessCoord(pipe=2, data=6, model=0): 152, ProcessCoord(pipe=2, data=6, model=1): 153, ProcessCoord(pipe=2, data=6, model=2): 154, ProcessCoord(pipe=2, data=6, model=3): 155, ProcessCoord(pipe=2, data=7, model=0): 156, ProcessCoord(pipe=2, data=7, model=1): 157, ProcessCoord(pipe=2, data=7, model=2): 158, ProcessCoord(pipe=2, data=7, model=3): 159, ProcessCoord(pipe=2, data=8, model=0): 160, ProcessCoord(pipe=2, data=8, model=1): 161, ProcessCoord(pipe=2, data=8, model=2): 162, ProcessCoord(pipe=2, data=8, model=3): 16 + 0: 3, ProcessCoord(pipe=2, data=9, model=0): 164, ProcessCoord(pipe=2, data=9, model=1): 165, ProcessCoord(pipe=2, data=9, model=2): 166, ProcessCoord(pipe=2, data=9, model=3): 167, ProcessCoord(pipe=2, data=10, model=0): 168, ProcessCoord(pipe=2, data=10, model=1): 169, ProcessCoord(pipe=2, data=10, model=2): 170, ProcessCoord(pipe=2, data=10, model=3): 171, ProcessCoord(pipe=2, data=11, model=0): 172, ProcessCoord(pipe=2, data=11, model=1): 173, ProcessCoord(pipe=2, data=11, model=2): 174, ProcessCoord(pipe=2, data=11, model=3): 175, ProcessCoord(pipe=2, data=12, model=0): 176, ProcessCoord(pipe=2, data=12, model=1): 177, ProcessCoord(pipe=2, data=12, model=2): 178, ProcessCoord(pipe=2, data=12, model=3): 179, ProcessCoord(pipe=2, data=13, model=0): 180, ProcessCoord(pipe=2, data=13, model=1): 181, ProcessCoord(pipe=2, data=13, model=2): 182, ProcessCoord(pipe=2, data=13, model=3): 183, ProcessCoord(pipe=2, data=14, model=0): 184, ProcessCoord(pipe=2, data=14, model=1): 185, ProcessCoord(pipe=2, data=14, model + 0: =2): 186, ProcessCoord(pipe=2, data=14, model=3): 187, ProcessCoord(pipe=2, data=15, model=0): 188, ProcessCoord(pipe=2, data=15, model=1): 189, ProcessCoord(pipe=2, data=15, model=2): 190, ProcessCoord(pipe=2, data=15, model=3): 191, ProcessCoord(pipe=3, data=0, model=0): 192, ProcessCoord(pipe=3, data=0, model=1): 193, ProcessCoord(pipe=3, data=0, model=2): 194, ProcessCoord(pipe=3, data=0, model=3): 195, ProcessCoord(pipe=3, data=1, model=0): 196, ProcessCoord(pipe=3, data=1, model=1): 197, ProcessCoord(pipe=3, data=1, model=2): 198, ProcessCoord(pipe=3, data=1, model=3): 199, ProcessCoord(pipe=3, data=2, model=0): 200, ProcessCoord(pipe=3, data=2, model=1): 201, ProcessCoord(pipe=3, data=2, model=2): 202, ProcessCoord(pipe=3, data=2, model=3): 203, ProcessCoord(pipe=3, data=3, model=0): 204, ProcessCoord(pipe=3, data=3, model=1): 205, ProcessCoord(pipe=3, data=3, model=2): 206, ProcessCoord(pipe=3, data=3, model=3): 207, ProcessCoord(pipe=3, data=4, model=0): 208, ProcessCoord(pipe=3, data=4, model=1): 20 + 0: 9, ProcessCoord(pipe=3, data=4, model=2): 210, ProcessCoord(pipe=3, data=4, model=3): 211, ProcessCoord(pipe=3, data=5, model=0): 212, ProcessCoord(pipe=3, data=5, model=1): 213, ProcessCoord(pipe=3, data=5, model=2): 214, ProcessCoord(pipe=3, data=5, model=3): 215, ProcessCoord(pipe=3, data=6, model=0): 216, ProcessCoord(pipe=3, data=6, model=1): 217, ProcessCoord(pipe=3, data=6, model=2): 218, ProcessCoord(pipe=3, data=6, model=3): 219, ProcessCoord(pipe=3, data=7, model=0): 220, ProcessCoord(pipe=3, data=7, model=1): 221, ProcessCoord(pipe=3, data=7, model=2): 222, ProcessCoord(pipe=3, data=7, model=3): 223, ProcessCoord(pipe=3, data=8, model=0): 224, ProcessCoord(pipe=3, data=8, model=1): 225, ProcessCoord(pipe=3, data=8, model=2): 226, ProcessCoord(pipe=3, data=8, model=3): 227, ProcessCoord(pipe=3, data=9, model=0): 228, ProcessCoord(pipe=3, data=9, model=1): 229, ProcessCoord(pipe=3, data=9, model=2): 230, ProcessCoord(pipe=3, data=9, model=3): 231, ProcessCoord(pipe=3, data=10, model=0): 232, ProcessC + 0: oord(pipe=3, data=10, model=1): 233, ProcessCoord(pipe=3, data=10, model=2): 234, ProcessCoord(pipe=3, data=10, model=3): 235, ProcessCoord(pipe=3, data=11, model=0): 236, ProcessCoord(pipe=3, data=11, model=1): 237, ProcessCoord(pipe=3, data=11, model=2): 238, ProcessCoord(pipe=3, data=11, model=3): 239, ProcessCoord(pipe=3, data=12, model=0): 240, ProcessCoord(pipe=3, data=12, model=1): 241, ProcessCoord(pipe=3, data=12, model=2): 242, ProcessCoord(pipe=3, data=12, model=3): 243, ProcessCoord(pipe=3, data=13, model=0): 244, ProcessCoord(pipe=3, data=13, model=1): 245, ProcessCoord(pipe=3, data=13, model=2): 246, ProcessCoord(pipe=3, data=13, model=3): 247, ProcessCoord(pipe=3, data=14, model=0): 248, ProcessCoord(pipe=3, data=14, model=1): 249, ProcessCoord(pipe=3, data=14, model=2): 250, ProcessCoord(pipe=3, data=14, model=3): 251, ProcessCoord(pipe=3, data=15, model=0): 252, ProcessCoord(pipe=3, data=15, model=1): 253, ProcessCoord(pipe=3, data=15, model=2): 254, ProcessCoord(pipe=3, data=15, model=3): 25 + 0: 5} + 0: [2023-05-25 13:37:53,844] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer + 0: stage=0 layers=14 + 0: 0: _to_float16 + 0: 1: EmbeddingPipe + 0: 2: + 0: 3: ParallelTransformerLayerPipe + 0: 4: ParallelTransformerLayerPipe + 0: 5: ParallelTransformerLayerPipe + 0: 6: ParallelTransformerLayerPipe + 0: 7: ParallelTransformerLayerPipe + 0: 8: ParallelTransformerLayerPipe + 0: 9: ParallelTransformerLayerPipe + 0: 10: ParallelTransformerLayerPipe + 0: 11: ParallelTransformerLayerPipe + 0: 12: ParallelTransformerLayerPipe + 0: 13: ParallelTransformerLayerPipe + 0: stage=1 layers=11 + 0: 14: ParallelTransformerLayerPipe + 0: 15: ParallelTransformerLayerPipe + 0: 16: ParallelTransformerLayerPipe + 0: 17: ParallelTransformerLayerPipe + 0: 18: ParallelTransformerLayerPipe + 0: 19: ParallelTransformerLayerPipe + 0: 20: ParallelTransformerLayerPipe + 0: 21: ParallelTransformerLayerPipe + 0: 22: ParallelTransformerLayerPipe + 0: 23: ParallelTransformerLayerPipe + 0: 24: ParallelTransformerLayerPipe + 0: stage=2 layers=11 + 0: 25: ParallelTransformerLayerPipe + 0: 26: ParallelTransformerLayerPipe + 0: 27: ParallelTransformerLayerPipe + 0: 28: ParallelTransformerLayerPipe + 0: 29: ParallelTransformerLayerPipe + 0: 30: ParallelTransformerLayerPipe + 0: 31: ParallelTransformerLayerPipe + 0: 32: ParallelTransformerLayerPipe + 0: 33: ParallelTransformerLayerPipe + 0: 34: ParallelTransformerLayerPipe + 0: 35: ParallelTransformerLayerPipe + 0: stage=3 layers=13 + 0: 36: ParallelTransformerLayerPipe + 0: 37: ParallelTransformerLayerPipe + 0: 38: ParallelTransformerLayerPipe + 0: 39: ParallelTransformerLayerPipe + 0: 40: ParallelTransformerLayerPipe + 0: 41: ParallelTransformerLayerPipe + 0: 42: ParallelTransformerLayerPipe + 0: 43: ParallelTransformerLayerPipe + 0: 44: ParallelTransformerLayerPipe + 0: 45: undo + 0: 46: MixedFusedLayerNorm + 0: 47: EmbeddingPipe + 0: 48: float16_to_fp32 + 0: loss: CrossEntropy + 0: [2023-05-25 13:37:55,402] [INFO] [utils.py:827:see_memory_usage] After Building Model + 0: [2023-05-25 13:37:55,403] [INFO] [utils.py:828:see_memory_usage] MA 1.16 GB Max_MA 1.16 GB CA 1.19 GB Max_CA 1 GB + 0: [2023-05-25 13:37:55,403] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 41.65 GB, percent = 8.3% + 0: setting training iterations to 0 + 0: > learning rate decay style: cosine + 0: DeepSpeed is enabled. + 0: [2023-05-25 13:37:55,405] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +31: ninja: no work to do. +31: Time to load utils op: 0.35482096672058105 seconds +31: Time to load utils op: 0.3555941581726074 seconds +19: Time to load utils op: 0.6255035400390625 seconds +19: Time to load utils op: 0.7619521617889404 seconds +19: Time to load utils op: 0.640009880065918 seconds +19: Time to load utils op: 0.7617371082305908 seconds +19: Time to load utils op: 0.6402287483215332 secondsTime to load utils op: 0.6235151290893555 seconds +19: +23: Time to load utils op: 0.6442480087280273 seconds +23: Time to load utils op: 0.644397497177124 seconds +23: Time to load utils op: 0.6295356750488281 secondsTime to load utils op: 0.6297821998596191 seconds +23: +21: Time to load utils op: 0.6447434425354004 secondsTime to load utils op: 0.6303584575653076 seconds +21: +21: Time to load utils op: 0.6448769569396973 secondsTime to load utils op: 0.6303319931030273 seconds +21: +18: Time to load utils op: 0.6607129573822021 secondsTime to load utils op: 0.650705099105835 seconds +18: +18: Time to load utils op: 0.6307802200317383 seconds +18: Time to load utils op: 0.630800724029541 seconds + 9: Time to load utils op: 0.7523808479309082 seconds + 9: Time to load utils op: 0.7523963451385498 seconds + 9: Time to load utils op: 0.5048801898956299 seconds + 9: Time to load utils op: 0.5050580501556396 seconds + 9: Time to load utils op: 0.6034388542175293 seconds + 9: Time to load utils op: 0.6033079624176025 seconds + 8: Time to load utils op: 0.6116323471069336 seconds + 8: Time to load utils op: 0.6110455989837646 seconds + 8: Time to load utils op: 0.7630081176757812 secondsTime to load utils op: 0.5121304988861084 seconds + 8: + 8: Time to load utils op: 0.7630181312561035 secondsTime to load utils op: 0.6185076236724854 seconds + 8: + 8: Time to load utils op: 0.5171716213226318 seconds + 8: Time to load utils op: 0.6094648838043213 seconds + 9: Time to load utils op: 0.6036350727081299 seconds + 9: Time to load utils op: 0.6037423610687256 seconds +22: Time to load utils op: 0.637505054473877 seconds +22: Time to load utils op: 0.6547441482543945 seconds +22: Time to load utils op: 0.6399564743041992 secondsTime to load utils op: 0.6537578105926514 seconds +22: +28: Time to load utils op: 0.37691831588745117 seconds +28: Time to load utils op: 0.37659502029418945 seconds +20: Time to load utils op: 0.6473941802978516 seconds +20: Time to load utils op: 0.6630938053131104 seconds +20: Time to load utils op: 0.6473824977874756 seconds +20: Time to load utils op: 0.6630170345306396 seconds +16: Time to load utils op: 0.6548614501953125 seconds +16: Time to load utils op: 0.6494412422180176 secondsTime to load utils op: 0.5049982070922852 seconds +16: +20: Time to load utils op: 0.5028359889984131 seconds +20: Time to load utils op: 0.5033493041992188 seconds +23: Time to load utils op: 0.5036108493804932 secondsTime to load utils op: 0.5036704540252686 seconds +23: +16: Time to load utils op: 0.5024404525756836 seconds +10: Time to load utils op: 0.5244503021240234 secondsTime to load utils op: 0.7615101337432861 seconds +10: +14: Time to load utils op: 0.6249353885650635 seconds +14: Time to load utils op: 0.7571377754211426 seconds +14: Time to load utils op: 0.6251101493835449 seconds +10: Time to load utils op: 0.7614555358886719 seconds +14: Time to load utils op: 0.524993896484375 secondsTime to load utils op: 0.6103222370147705 seconds +14: Time to load utils op: 0.523761510848999 seconds +10: Time to load utils op: 0.6244323253631592 secondsTime to load utils op: 0.5242915153503418 seconds +10: +10: Time to load utils op: 0.6243798732757568 secondsTime to load utils op: 0.61395263671875 seconds +10: +14: +10: Time to load utils op: 0.6139044761657715 seconds +14: Time to load utils op: 0.6101114749908447 seconds +14: Time to load utils op: 0.7553789615631104 seconds +18: Time to load utils op: 0.503535270690918 seconds +18: Time to load utils op: 0.5032339096069336 seconds +22: Time to load utils op: 0.5034031867980957 seconds +22: Time to load utils op: 0.5035736560821533 seconds +27: Time to load utils op: 0.375333309173584 seconds +27: Time to load utils op: 0.3753824234008789 seconds + 3: Time to load utils op: 0.39586830139160156 secondsTime to load utils op: 0.4019327163696289 seconds + 3: +19: Time to load utils op: 0.503997802734375 seconds + 4: Time to load utils op: 0.43061161041259766 seconds + 4: Time to load utils op: 0.42981815338134766 seconds +26: Time to load utils op: 0.3466982841491699 seconds +26: Time to load utils op: 0.3487203121185303 seconds +17: Time to load utils op: 0.6555821895599365 secondsTime to load utils op: 0.5060997009277344 seconds +17: +17: Time to load utils op: 0.6556594371795654 seconds +17: Time to load utils op: 0.5062394142150879 seconds +25: Time to load utils op: 0.34867024421691895 seconds +12: Time to load utils op: 0.6252782344818115 seconds +12: Time to load utils op: 0.756960391998291 seconds +25: Time to load utils op: 0.3486919403076172 seconds +12: Time to load utils op: 0.524432897567749 seconds +12: Time to load utils op: 0.5242776870727539 seconds +12: Time to load utils op: 0.6135294437408447 seconds +12: Time to load utils op: 0.6247193813323975 secondsTime to load utils op: 0.7569403648376465 seconds +12: +12: Time to load utils op: 0.6135334968566895 seconds +19: Time to load utils op: 0.5035393238067627 seconds +13: Time to load utils op: 0.7623779773712158 seconds +13: Time to load utils op: 0.629141092300415 seconds +13: Time to load utils op: 0.6168467998504639 secondsTime to load utils op: 0.7623577117919922 seconds +13: +13: Time to load utils op: 0.6291086673736572 secondsTime to load utils op: 0.5278606414794922 secondsTime to load utils op: 0.6169326305389404 seconds +13: +13: +13: Time to load utils op: 0.5280539989471436 seconds +15: Time to load utils op: 0.5263903141021729 secondsTime to load utils op: 0.7589719295501709 secondsTime to load utils op: 0.6280527114868164 seconds +15: +15: +15: Time to load utils op: 0.5273325443267822 seconds +15: Time to load utils op: 0.7568120956420898 seconds +15: Time to load utils op: 0.6129376888275146 seconds +15: Time to load utils op: 0.6265995502471924 secondsTime to load utils op: 0.6131298542022705 seconds +15: +21: Time to load utils op: 0.5034613609313965 seconds +21: Time to load utils op: 0.5035789012908936 seconds + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer + 0: [2023-05-25 13:37:56,752] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +11: Time to load utils op: 0.649294376373291 seconds +11: Time to load utils op: 0.6384930610656738 seconds +11: Time to load utils op: 0.7818303108215332 secondsTime to load utils op: 0.5526106357574463 secondsTime to load utils op: 0.7818324565887451 seconds +11: +11: + 0: [2023-05-25 13:37:56,755] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +11: Time to load utils op: 0.6502220630645752 seconds +11: Time to load utils op: 0.5489628314971924 seconds + 0: [2023-05-25 13:37:56,755] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +11: Time to load utils op: 0.6385819911956787 seconds + 7: Time to load utils op: 0.40544700622558594 seconds + 7: Time to load utils op: 0.40546727180480957 seconds + 1: Time to load utils op: 0.42217397689819336 secondsTime to load utils op: 0.42217302322387695 seconds + 1: +29: Time to load utils op: 0.4243769645690918 seconds +29: Time to load utils op: 0.42180967330932617 seconds +24: Time to load utils op: 0.3813297748565674 seconds +24: Time to load utils op: 0.3812861442565918 seconds +30: Time to load utils op: 0.3537108898162842 seconds +30: Time to load utils op: 0.3539261817932129 seconds + 5: Time to load utils op: 0.47379064559936523 secondsTime to load utils op: 0.4736306667327881 seconds + 5: + 2: Time to load utils op: 0.41500210762023926 secondsTime to load utils op: 0.41503238677978516 seconds + 2: + 0: Time to load utils op: 0.5198867321014404 seconds + 0: Time to load utils op: 0.5251860618591309 seconds + 0: [2023-05-25 13:37:56,878] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer + 0: [2023-05-25 13:37:56,879] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.18 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:56,879] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.22 GB, percent = 8.4% +13: Time to load utils op: 0.0004911422729492188 seconds +13: Time to load utils op: 0.0005314350128173828 seconds +13: Time to load utils op: 0.0005457401275634766 seconds +13: Time to load utils op: 0.0003991127014160156 seconds +13: Time to load utils op: 0.000438690185546875 seconds +13: Time to load utils op: 0.0003879070281982422 seconds + 3: Time to load utils op: 0.0006628036499023438 seconds + 3: Time to load utils op: 0.0007040500640869141 seconds +14: Time to load utils op: 0.0005333423614501953 secondsTime to load utils op: 0.0004971027374267578 seconds +14: +14: Time to load utils op: 0.00039458274841308594 seconds +14: Time to load utils op: 0.0004482269287109375 secondsTime to load utils op: 0.0004658699035644531 seconds +14: +14: Time to load utils op: 0.0004975795745849609 seconds +14: Time to load utils op: 0.0004334449768066406 seconds +14: Time to load utils op: 0.00048804283142089844 seconds + 4: Time to load utils op: 0.0007066726684570312 seconds + 4: Time to load utils op: 0.0009107589721679688 seconds +10: Time to load utils op: 0.0005564689636230469 seconds +10: Time to load utils op: 0.00055694580078125 secondsTime to load utils op: 0.0005605220794677734 seconds +10: +10: Time to load utils op: 0.0006020069122314453 secondsTime to load utils op: 0.000598907470703125 secondsTime to load utils op: 0.0005743503570556641 seconds +10: +10: +10: Time to load utils op: 0.0005400180816650391 seconds +10: Time to load utils op: 0.0006353855133056641 seconds +28: Time to load utils op: 0.0005469322204589844 seconds +28: Time to load utils op: 0.0005781650543212891 seconds +21: Time to load utils op: 0.000453948974609375 seconds +21: Time to load utils op: 0.0004487037658691406 seconds +21: Time to load utils op: 0.0004787445068359375 seconds +21: Time to load utils op: 0.0004916191101074219 seconds +21: Time to load utils op: 0.00048089027404785156 seconds +21: Time to load utils op: 0.0009932518005371094 seconds +31: Time to load utils op: 0.0010154247283935547 seconds +31: Time to load utils op: 0.0008137226104736328 seconds +19: Time to load utils op: 0.0005137920379638672 seconds +19: Time to load utils op: 0.0005669593811035156 seconds +19: Time to load utils op: 0.0005655288696289062 seconds +19: Time to load utils op: 0.0005581378936767578 seconds +19: Time to load utils op: 0.0005652904510498047 seconds +19: Time to load utils op: 0.0005533695220947266 secondsTime to load utils op: 0.0005691051483154297 seconds +19: +19: Time to load utils op: 0.000537872314453125 seconds +24: Time to load utils op: 0.0008769035339355469 seconds +24: Time to load utils op: 0.0008220672607421875 seconds +30: Time to load utils op: 0.0005125999450683594 seconds +30: Time to load utils op: 0.0005364418029785156 seconds + 0: Time to load utils op: 0.0007154941558837891 seconds + 0: Time to load utils op: 0.0008318424224853516 seconds +18: Time to load utils op: 0.0005295276641845703 seconds +18: Time to load utils op: 0.00042819976806640625 seconds +18: Time to load utils op: 0.00043773651123046875 seconds +18: Time to load utils op: 0.0004324913024902344 seconds +18: Time to load utils op: 0.0003883838653564453 seconds +18: Time to load utils op: 0.0004742145538330078 seconds +17: Time to load utils op: 0.00046706199645996094 seconds +17: Time to load utils op: 0.00047469139099121094 seconds +17: Time to load utils op: 0.00048232078552246094 seconds +17: Time to load utils op: 0.0005714893341064453 seconds +16: Time to load utils op: 0.0004658699035644531 secondsTime to load utils op: 0.00046181678771972656 seconds +16: +16: Time to load utils op: 0.0004951953887939453 seconds +16: Time to load utils op: 0.0005321502685546875 seconds +15: Time to load utils op: 0.0005424022674560547 seconds +15: Time to load utils op: 0.000530242919921875 seconds +15: Time to load utils op: 0.0004715919494628906 seconds +15: Time to load utils op: 0.0005218982696533203 seconds +15: Time to load utils op: 0.0005478858947753906 secondsTime to load utils op: 0.0005428791046142578 seconds +15: +15: Time to load utils op: 0.0005142688751220703 seconds +15: Time to load utils op: 0.0005137920379638672 seconds +26: Time to load utils op: 0.0007157325744628906 seconds +26: Time to load utils op: 0.0008149147033691406 seconds + 9: Time to load utils op: 0.0005331039428710938 seconds + 9: Time to load utils op: 0.0005381107330322266 secondsTime to load utils op: 0.0005323886871337891 seconds + 9: + 9: Time to load utils op: 0.0005395412445068359 seconds + 9: Time to load utils op: 0.0005598068237304688 secondsTime to load utils op: 0.0005774497985839844 secondsTime to load utils op: 0.0005488395690917969 seconds + 9: + 9: + 9: Time to load utils op: 0.0004863739013671875 seconds +25: Time to load utils op: 0.0007739067077636719 seconds +25: Time to load utils op: 0.0006783008575439453 seconds +25: ninja: no work to do. + 5: Time to load utils op: 0.0008363723754882812 seconds + 5: Time to load utils op: 0.0007798671722412109 seconds +12: Time to load utils op: 0.000560760498046875 seconds +12: Time to load utils op: 0.0005450248718261719 seconds +12: Time to load utils op: 0.0005784034729003906 seconds +12: Time to load utils op: 0.0004837512969970703 secondsTime to load utils op: 0.0005052089691162109 seconds +12: +12: Time to load utils op: 0.0004963874816894531 seconds +12: Time to load utils op: 0.0004868507385253906 seconds +12: Time to load utils op: 0.0005054473876953125 seconds +22: Time to load utils op: 0.0005154609680175781 seconds +22: Time to load utils op: 0.0004169940948486328 seconds +22: Time to load utils op: 0.0004322528839111328 seconds +22: Time to load utils op: 0.00042700767517089844 seconds + 8: Time to load utils op: 0.0004940032958984375 seconds + 8: Time to load utils op: 0.0004024505615234375 seconds + 8: Time to load utils op: 0.0004024505615234375 seconds + 8: Time to load utils op: 0.0005018711090087891 seconds +23: Time to load utils op: 0.0009386539459228516 seconds +22: Time to load utils op: 0.0004756450653076172 seconds + 8: Time to load utils op: 0.0004134178161621094 seconds +23: Time to load utils op: 0.000978231430053711 seconds +22: Time to load utils op: 0.0004837512969970703 seconds +23: Time to load utils op: 0.0009717941284179688 secondsTime to load utils op: 0.000989675521850586 seconds + 8: Time to load utils op: 0.0004093647003173828 seconds +23: +23: Time to load utils op: 0.00096893310546875 secondsTime to load utils op: 0.000993967056274414 seconds +23: + 8: Time to load utils op: 0.000400543212890625 seconds + 1: Time to load utils op: 0.0008938312530517578 seconds +11: Time to load utils op: 0.0006086826324462891 seconds + 8: Time to load utils op: 0.00042319297790527344 seconds +11: Time to load utils op: 0.00047516822814941406 secondsTime to load utils op: 0.0004935264587402344 seconds +11: +11: Time to load utils op: 0.0004901885986328125 seconds + 7: Time to load utils op: 0.0008356571197509766 secondsTime to load utils op: 0.0008122920989990234 seconds + 7: +11: Time to load utils op: 0.0005433559417724609 seconds +11: Time to load utils op: 0.0005519390106201172 secondsTime to load utils op: 0.0005276203155517578 seconds +11: +11: Time to load utils op: 0.0005927085876464844 seconds + 1: Time to load utils op: 0.0007457733154296875 seconds +29: Time to load utils op: 0.0009691715240478516 seconds +29: Time to load utils op: 0.0008306503295898438 seconds +27: Time to load utils op: 0.0010447502136230469 seconds +20: Time to load utils op: 0.003806591033935547 seconds +20: Time to load utils op: 0.0038080215454101562 seconds +27: Time to load utils op: 0.005916118621826172 seconds +20: Time to load utils op: 0.0038957595825195312 seconds +20: Time to load utils op: 0.003911495208740234 seconds +20: Time to load utils op: 0.003934383392333984 seconds +20: Time to load utils op: 0.003922462463378906 seconds +25: Time to load utils op: 0.20262527465820312 seconds +13: Time to load utils op: 0.00041174888610839844 seconds +13: Time to load utils op: 0.00041985511779785156 seconds +25: Time to load utils op: 0.0006263256072998047 seconds + 2: Time to load utils op: 0.0009274482727050781 seconds + 2: Time to load utils op: 0.0010440349578857422 seconds +29: ninja: no work to do. +29: Time to load utils op: 0.17166757583618164 seconds +29: Time to load utils op: 0.0005838871002197266 seconds + 7: ninja: no work to do. + 3: Time to load utils op: 0.3140723705291748 seconds + 2: Time to load utils op: 0.3137197494506836 seconds + 4: Time to load utils op: 0.6191346645355225 secondsTime to load utils op: 0.6192529201507568 seconds + 4: + 5: Time to load utils op: 0.619361400604248 seconds + 0: Time to load utils op: 0.6335406303405762 seconds + 2: Time to load utils op: 0.6100924015045166 secondsTime to load utils op: 0.314011812210083 seconds + 2: + 2: Time to load utils op: 0.6131505966186523 seconds + 1: Time to load utils op: 0.31455373764038086 seconds + 7: Time to load utils op: 0.6125161647796631 seconds + 3: Time to load utils op: 0.3138618469238281 seconds + 5: Time to load utils op: 0.6199865341186523 seconds + 3: Time to load utils op: 0.6209688186645508 seconds + 3: Time to load utils op: 0.6208791732788086 seconds + 0: Time to load utils op: 0.5101697444915771 seconds + 1: Time to load utils op: 0.3148794174194336 seconds + 1: Time to load utils op: 0.6159496307373047 seconds + 7: Time to load utils op: 0.6136651039123535 seconds + 7: Time to load utils op: 0.2957611083984375 seconds + 1: Time to load utils op: 0.6159713268280029 seconds + 4: Time to load utils op: 0.3027513027191162 seconds + 4: Time to load utils op: 0.3033452033996582 seconds + 5: Time to load utils op: 0.3021821975708008 seconds + 5: Time to load utils op: 0.30245161056518555 seconds + 7: Time to load utils op: 0.302898645401001 seconds + 6: Time to load utils op: 0.3058302402496338 seconds + 6: Time to load utils op: 0.30589842796325684 seconds + 6: Time to load utils op: 0.6255815029144287 seconds + 6: Time to load utils op: 0.6257119178771973 seconds + 3: Time to load utils op: 0.0006623268127441406 seconds + 4: Time to load utils op: 0.0004749298095703125 seconds + 3: Time to load utils op: 0.000354766845703125 seconds + 2: Time to load utils op: 0.0004773139953613281 seconds + 2: Time to load utils op: 0.00036072731018066406 seconds + 4: Time to load utils op: 0.00035190582275390625 seconds + 2: Time to load utils op: 0.0004267692565917969 secondsTime to load utils op: 0.00041985511779785156 seconds + 2: + 4: Time to load utils op: 0.00035190582275390625 seconds + 5: Time to load utils op: 0.0004923343658447266 seconds + 5: Time to load utils op: 0.00045561790466308594 seconds + 4: Time to load utils op: 0.00031876564025878906 seconds + 0: Time to load utils op: 0.0005910396575927734 seconds + 3: Time to load utils op: 0.00034618377685546875 seconds + 3: Time to load utils op: 0.00035881996154785156 seconds + 1: Time to load utils op: 0.0004734992980957031 seconds + 1: Time to load utils op: 0.0003402233123779297 seconds + 5: Time to load utils op: 0.0003216266632080078 seconds + 5: Time to load utils op: 0.0003387928009033203 seconds + 7: Time to load utils op: 0.0009579658508300781 seconds + 1: Time to load utils op: 0.0004901885986328125 seconds + 1: Time to load utils op: 0.0004684925079345703 seconds + 7: Time to load utils op: 0.0006663799285888672 seconds + 7: Time to load utils op: 0.0005333423614501953 seconds + 0: Time to load utils op: 0.2028365135192871 seconds + 7: Time to load utils op: 0.0008783340454101562 seconds + 0: Time to load utils op: 0.20167136192321777 seconds + 2: Time to load utils op: 0.20192480087280273 seconds + 2: Time to load utils op: 0.2021617889404297 seconds + 5: Time to load utils op: 0.2022266387939453 secondsTime to load utils op: 0.20219755172729492 seconds + 5: + 6: Time to load utils op: 0.20230889320373535 secondsTime to load utils op: 0.20223712921142578 seconds + 6: + 3: Time to load utils op: 0.20257854461669922 seconds + 4: Time to load utils op: 0.2022080421447754 seconds + 4: Time to load utils op: 0.20213651657104492 seconds + 3: Time to load utils op: 0.20235562324523926 seconds + 1: Time to load utils op: 0.2022075653076172 seconds + 1: Time to load utils op: 0.2019965648651123 seconds + 7: Time to load utils op: 0.20205378532409668 seconds + 7: Time to load utils op: 0.20262551307678223 seconds +24: Time to load utils op: 0.5034389495849609 seconds +24: Time to load utils op: 0.502748966217041 seconds +25: Time to load utils op: 0.5031239986419678 seconds +25: Time to load utils op: 0.5035305023193359 seconds +26: Time to load utils op: 0.5028486251831055 seconds +26: Time to load utils op: 0.5030839443206787 seconds +24: Time to load utils op: 0.40320706367492676 seconds +25: Time to load utils op: 0.7038285732269287 seconds +29: Time to load utils op: 0.504371166229248 seconds + 0: Time to load utils op: 0.00044846534729003906 seconds +28: Time to load utils op: 0.5061213970184326 seconds +27: Time to load utils op: 0.5058856010437012 seconds +16: Time to load utils op: 1.5145437717437744 seconds +17: Time to load utils op: 1.513885259628296 seconds +17: Time to load utils op: 1.513932228088379 seconds +28: Time to load utils op: 0.5066466331481934 seconds +30: Time to load utils op: 0.5041708946228027 seconds +27: Time to load utils op: 0.5060453414916992 seconds +30: Time to load utils op: 0.504035234451294 seconds +16: Time to load utils op: 1.5149848461151123 seconds +18: Time to load utils op: 1.5119423866271973 seconds +28: Time to load utils op: 0.7038934230804443 seconds +26: Time to load utils op: 0.7036864757537842 seconds +26: Time to load utils op: 0.7036492824554443 seconds +28: Time to load utils op: 0.703188419342041 seconds +25: Time to load utils op: 0.4022367000579834 seconds + 0: Time to load utils op: 0.0004093647003173828 seconds +25: Time to load utils op: 0.40237903594970703 seconds +28: Time to load utils op: 0.4023430347442627 seconds +26: Time to load utils op: 0.40228271484375 seconds +29: Time to load utils op: 0.402202844619751 seconds +28: Time to load utils op: 0.40245580673217773 seconds +26: Time to load utils op: 0.402435302734375 seconds +30: Time to load utils op: 0.40219759941101074 seconds +24: Time to load utils op: 0.4026777744293213 seconds +30: Time to load utils op: 0.40262579917907715 seconds +27: Time to load utils op: 0.40294384956359863 seconds +31: Time to load utils op: 0.5039842128753662 seconds +29: Time to load utils op: 0.40281009674072266 seconds +31: Time to load utils op: 0.5040154457092285 seconds +27: Time to load utils op: 0.4026374816894531 seconds + 5: Time to load utils op: 0.0004558563232421875 seconds +27: Time to load utils op: 0.7041220664978027 seconds + 5: Time to load utils op: 0.0003600120544433594 seconds + 2: Time to load utils op: 0.00035834312438964844 seconds +31: Time to load utils op: 0.7040724754333496 seconds + 2: Time to load utils op: 0.0004260540008544922 seconds +31: Time to load utils op: 0.40274882316589355 seconds +27: Time to load utils op: 0.7046599388122559 seconds + 4: Time to load utils op: 0.00037026405334472656 seconds + 4: Time to load utils op: 0.00036787986755371094 seconds + 3: Time to load utils op: 0.00035190582275390625 seconds +31: Time to load utils op: 0.7046389579772949 seconds +31: Time to load utils op: 0.40326738357543945 seconds +18: Time to load utils op: 1.505786657333374 seconds + 3: Time to load utils op: 0.0003795623779296875 seconds +16: Time to load utils op: 1.404435634613037 seconds +24: Time to load utils op: 0.0004868507385253906 seconds +20: Time to load utils op: 1.5071334838867188 seconds +29: Time to load utils op: 0.7089626789093018 secondsTime to load utils op: 0.7092471122741699 seconds +29: +16: Time to load utils op: 1.404235601425171 seconds + 1: Time to load utils op: 0.00036716461181640625 seconds + 1: Time to load utils op: 0.00037169456481933594 seconds + 7: Time to load utils op: 0.0005249977111816406 seconds +20: Time to load utils op: 1.507591724395752 seconds +21: Time to load utils op: 1.50722074508667 seconds +25: Time to load utils op: 0.0004971027374267578 seconds + 6: Time to load utils op: 1.1128008365631104 seconds +23: Time to load utils op: 1.507453203201294 seconds + 6: Time to load utils op: 1.1131837368011475 seconds +23: Time to load utils op: 1.5077097415924072 seconds + 7: Time to load utils op: 0.0005033016204833984 seconds +24: Time to load utils op: 0.0029964447021484375 seconds +17: Time to load utils op: 1.4048008918762207 seconds +25: Time to load utils op: 0.0004019737243652344 seconds +17: Time to load utils op: 1.4049618244171143 seconds +21: Time to load utils op: 1.5075678825378418 seconds +22: Time to load utils op: 1.5080592632293701 seconds +22: Time to load utils op: 1.508380651473999 seconds +25: Time to load utils op: 0.00039505958557128906 seconds +30: Time to load utils op: 0.7036492824554443 seconds +24: Time to load utils op: 0.00045490264892578125 seconds +30: Time to load utils op: 0.7068638801574707 seconds +24: Time to load utils op: 0.7037913799285889 secondsTime to load utils op: 0.7036895751953125 seconds +24: +25: Time to load utils op: 0.00037598609924316406 seconds +30: Time to load utils op: 0.0005154609680175781 seconds +30: Time to load utils op: 0.0005080699920654297 seconds +17: Time to load utils op: 0.0004990100860595703 seconds +17: Time to load utils op: 0.0003681182861328125 seconds +30: Time to load utils op: 0.00035452842712402344 seconds +30: Time to load utils op: 0.000335693359375 seconds +25: Time to load utils op: 0.0003371238708496094 seconds +26: Time to load utils op: 0.007875442504882812 seconds +26: Time to load utils op: 0.007835149765014648 seconds +16: Time to load utils op: 0.008891105651855469 secondsTime to load utils op: 0.009286165237426758 seconds +16: +24: Time to load utils op: 0.00039505958557128906 seconds +30: Time to load utils op: 0.0003631114959716797 seconds +30: Time to load utils op: 0.0003628730773925781 seconds +22: Time to load utils op: 0.0006701946258544922 seconds +22: Time to load utils op: 0.0006990432739257812 seconds +31: Time to load utils op: 0.0008587837219238281 seconds +31: Time to load utils op: 0.0011551380157470703 seconds +31: Time to load utils op: 0.0007977485656738281 seconds +31: Time to load utils op: 0.0006387233734130859 seconds +31: Time to load utils op: 0.0006992816925048828 seconds +31: Time to load utils op: 0.0005319118499755859 seconds +16: Time to load utils op: 0.000362396240234375 seconds + 0: Time to load utils op: 0.40247488021850586 seconds +16: Time to load utils op: 0.0003695487976074219 seconds + 0: Time to load utils op: 0.4025542736053467 seconds +24: Time to load utils op: 0.000347137451171875 seconds + 6: Time to load utils op: 0.0005364418029785156 seconds + 6: Time to load utils op: 0.0004291534423828125 seconds + 6: Time to load utils op: 0.0004558563232421875 seconds + 6: Time to load utils op: 0.0005381107330322266 seconds + 6: Time to load utils op: 0.0005307197570800781 seconds + 6: Time to load utils op: 0.0005469322204589844 seconds + 6: Time to load utils op: 0.0005400180816650391 seconds + 6: Time to load utils op: 0.0006139278411865234 seconds +24: Time to load utils op: 0.0003402233123779297 seconds +28: Time to load utils op: 0.0059223175048828125 seconds +28: Time to load utils op: 0.005858182907104492 seconds +28: Time to load utils op: 0.005786418914794922 seconds +28: Time to load utils op: 0.005945682525634766 seconds +28: Time to load utils op: 0.00642085075378418 seconds +28: Time to load utils op: 0.006276130676269531 seconds + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:828:see_memory_usage] MA 1.15 GB Max_MA 1.15 GB CA 1.21 GB Max_CA 1 GB + 0: [2023-05-25 13:37:57,516] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.27 GB, percent = 8.4% + 0: Time to load utils op: 0.0003707408905029297 seconds +29: Time to load utils op: 0.005079746246337891 seconds +29: Time to load utils op: 0.004809379577636719 seconds + 0: Time to load utils op: 0.0004894733428955078 seconds +29: Time to load utils op: 0.005235195159912109 seconds +29: Time to load utils op: 0.00046062469482421875 seconds +27: Time to load utils op: 0.0004982948303222656 seconds +27: Time to load utils op: 0.00038123130798339844 seconds +27: Time to load utils op: 0.00037550926208496094 seconds +27: Time to load utils op: 0.0003502368927001953 seconds +27: Time to load utils op: 0.0003502368927001953 seconds +27: Time to load utils op: 0.0004169940948486328 seconds +18: Time to load utils op: 0.004503011703491211 secondsTime to load utils op: 0.004517555236816406 seconds +18: +20: Time to load utils op: 0.003946065902709961 seconds +17: Time to load utils op: 0.0003981590270996094 seconds +17: Time to load utils op: 0.0003781318664550781 seconds +23: Time to load utils op: 0.004816293716430664 seconds +23: Time to load utils op: 0.004965066909790039 seconds +29: Time to load utils op: 0.0004379749298095703 seconds +20: Time to load utils op: 0.00035881996154785156 seconds +26: Time to load utils op: 0.0003616809844970703 seconds +26: Time to load utils op: 0.0003590583801269531 seconds +26: Time to load utils op: 0.00037407875061035156 seconds +26: Time to load utils op: 0.00048232078552246094 seconds + 0: [2023-05-25 13:37:57,650] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 + 0: [2023-05-25 13:37:57,651] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,651] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.31 GB, percent = 8.4% +21: Time to load utils op: 0.004395008087158203 seconds +21: Time to load utils op: 0.003838777542114258 seconds + 0: [2023-05-25 13:37:57,787] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 + 0: [2023-05-25 13:37:57,788] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.14 GB Max_CA 3 GB + 0: [2023-05-25 13:37:57,788] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.34 GB, percent = 8.4% + 0: [2023-05-25 13:37:57,917] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 + 0: [2023-05-25 13:37:57,917] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:57,918] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.46 GB, percent = 8.4% + 0: [2023-05-25 13:37:58,046] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 + 0: [2023-05-25 13:37:58,047] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,047] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.61 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,158] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 + 0: [2023-05-25 13:37:58,159] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,159] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.62 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:828:see_memory_usage] MA 3.58 GB Max_MA 3.58 GB CA 4.76 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,267] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.62 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,380] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.57 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,487] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer + 0: [2023-05-25 13:37:58,488] [INFO] [utils.py:828:see_memory_usage] MA 3.87 GB Max_MA 3.87 GB CA 5.04 GB Max_CA 5 GB + 0: [2023-05-25 13:37:58,488] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 42.57 GB, percent = 8.5% + 0: [2023-05-25 13:37:58,488] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam + 0: [2023-05-25 13:37:58,488] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler + 0: [2023-05-25 13:37:58,489] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = + 0: [2023-05-25 13:37:58,489] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] activation_checkpointing_config { + 0: "partition_activations": false, + 0: "contiguous_memory_optimization": false, + 0: "cpu_checkpointing": false, + 0: "number_checkpoints": null, + 0: "synchronize_checkpoint_boundary": false, + 0: "profile": false + 0: } + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] amp_enabled .................. False + 0: [2023-05-25 13:37:58,489] [INFO] [config.py:1011:print] amp_params ................... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] autotuning_config ............ { + 0: "enabled": false, + 0: "start_step": null, + 0: "end_step": null, + 0: "metric_path": null, + 0: "arg_mappings": null, + 0: "metric": "throughput", + 0: "model_info": null, + 0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", + 0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", + 0: "overwrite": true, + 0: "fast": true, + 0: "start_profile_step": 3, + 0: "end_profile_step": 5, + 0: "tuner_type": "gridsearch", + 0: "tuner_early_stopping": 5, + 0: "tuner_num_trials": 50, + 0: "model_info_path": null, + 0: "mp_size": 1, + 0: "max_train_batch_size": null, + 0: "min_train_batch_size": 1, + 0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, + 0: "min_train_micro_batch_size_per_gpu": 1, + 0: "num_tuning_micro_batch_sizes": 3 + 0: } + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] bfloat16_enabled ............. True + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] comms_config ................. + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] communication_data_type ...... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa + 0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] curriculum_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] curriculum_params ............ False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dataloader_drop_last ......... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] disable_allgather ............ False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dump_state ................... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] elasticity_enabled ........... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] flops_profiler_config ........ { + 0: "enabled": false, + 0: "profile_step": 1, + 0: "module_depth": -1, + 0: "top_modules": 1, + 0: "detailed": true, + 0: "output_file": null + 0: } + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_auto_cast ............... None + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_enabled ................. False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] global_rank .................. 0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 32 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] load_universal_checkpoint .... False + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] loss_scale ................... 1.0 + 0: [2023-05-25 13:37:58,490] [INFO] [config.py:1011:print] memory_breakdown ............. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] monitor_config ............... + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] nebula_config ................ { + 0: "enabled": false, + 0: "persistent_storage_path": null, + 0: "persistent_time_interval": 100, + 0: "num_of_version_in_retention": 2, + 0: "enable_nebula_load": true, + 0: "load_path": null + 0: } + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_name ............... None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] optimizer_params ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pld_enabled .................. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] pld_params ................... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] prescale_gradients ........... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] scheduler_name ............... None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] scheduler_params ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] sparse_attention ............. None + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] steps_per_print .............. 2000 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] train_batch_size ............. 512 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 1 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] use_node_local_storage ....... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] world_size ................... 16 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_enabled ................. False + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 + 0: [2023-05-25 13:37:58,491] [INFO] [config.py:996:print_user_config] json = { + 0: "train_micro_batch_size_per_gpu": 1, + 0: "train_batch_size": 512, + 0: "gradient_clipping": 1.0, + 0: "zero_optimization": { + 0: "stage": 0 + 0: }, + 0: "bf16": { + 0: "enabled": true + 0: }, + 0: "steps_per_print": 2.000000e+03, + 0: "wall_clock_breakdown": false + 0: } + 0: Time to load utils op: 0.0004456043243408203 seconds + 0: [2023-05-25 13:37:58,492] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=32 micro_batch_size=1 + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=3 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=2 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:37:58,956] [INFO] [engine.py:145:__init__] RANK=1 STAGE=0 LAYERS=14 [0, 14) STAGE_PARAMS=614290432 (614.290M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=64 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=67 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=66 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 8: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=65 STAGE=1 LAYERS=11 [14, 25) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=128 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=129 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=130 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +16: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=131 STAGE=2 LAYERS=11 [25, 36) STAGE_PARAMS=553997312 (553.997M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=193 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=194 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=195 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) +24: [2023-05-25 13:37:58,958] [INFO] [engine.py:145:__init__] RANK=192 STAGE=3 LAYERS=13 [36, 49) STAGE_PARAMS=513571840 (513.572M) TOTAL_PARAMS=8943427584 (8943.428M) UNIQUE_PARAMS=8702255104 (8702.255M) + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +16: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 9: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 1: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +28: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 7: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +14: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +18: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +30: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +21: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +31: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +11: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +25: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +19: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +17: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 6: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +10: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +23: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +22: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +15: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +20: [2023-05-25 13:38:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 3: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +16: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +13: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 0: [2023-05-25 13:38:00,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +16: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +14: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 6: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 3: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +28: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... + 1: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +29: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 0: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 0: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 6: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 6: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +29: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +12: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 4: [2023-05-25 13:38:00,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 6: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 6: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 2: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +19: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +14: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +18: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... + 2: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +17: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 1: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 4: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +15: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 2: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +19: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 9: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... + 8: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +23: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +22: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +27: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +25: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +21: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +31: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +20: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +11: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +27: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +25: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +22: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +31: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +23: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt... +20: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt... + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 7: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... + 7: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt... +10: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +30: [2023-05-25 13:38:00,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt... +28: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:38:00,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +16: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +29: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +26: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +24: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +22: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +16: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +26: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +24: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +25: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +29: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +28: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +23: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +28: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +23: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +18: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +17: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +21: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +13: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +19: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +22: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +27: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +13: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +19: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +31: [2023-05-25 13:38:00,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +16: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +24: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +29: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_12_model_states.pt. +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +23: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +22: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +25: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +18: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +10: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +19: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +13: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +19: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +10: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +12: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 8: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. +10: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. +15: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 1: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +12: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 7: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. +15: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +15: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. + 1: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 3: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 0: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 2: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 1: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 4: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 2: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 3: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. +15: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 6: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 0: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +13: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +13: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 9: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +13: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +14: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 8: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 8: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +12: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +10: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +15: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. + 9: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +12: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +24: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +30: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +28: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +27: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +26: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. +24: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +27: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt... +29: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +30: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt... + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt... +28: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_00_model_states.pt. + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_02_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_03_model_states.pt. + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. + 5: [2023-05-25 13:38:00,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_01_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +25: [2023-05-25 13:38:00,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. + 5: [2023-05-25 13:38:00,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +15: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +17: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_13_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +17: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +16: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +21: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +19: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +23: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +30: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +28: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +29: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +18: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +26: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +23: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +25: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +30: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_14_model_states.pt. +19: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +22: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +17: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +30: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +31: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +13: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +16: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +16: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. + 9: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +28: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 9: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +10: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +16: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +28: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +16: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +15: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +22: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +13: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +29: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 8: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +19: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +14: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. + 9: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +28: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +10: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +10: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +28: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +21: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +22: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +16: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +25: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +29: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +19: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +25: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +30: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +17: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +26: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +15: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +18: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +24: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +23: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_15_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +16: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +17: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +12: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +25: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +12: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +17: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +18: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +23: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +31: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +21: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +20: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +11: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_05_model_states.pt. +20: [2023-05-25 13:38:00,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_10_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +20: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_06_model_states.pt. +20: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_08_model_states.pt. +11: [2023-05-25 13:38:00,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +20: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +11: [2023-05-25 13:38:00,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_07_model_states.pt. +20: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_11_model_states.pt. +11: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +11: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_09_model_states.pt. +11: [2023-05-25 13:38:00,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/mp_rank_04_model_states.pt. +20: [2023-05-25 13:38:00,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +20: [2023-05-25 13:38:00,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +11: [2023-05-25 13:38:00,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +14: [2023-05-25 13:38:00,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +14: [2023-05-25 13:38:00,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +14: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +14: [2023-05-25 13:38:00,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +11: [2023-05-25 13:38:00,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +15: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +10: [2023-05-25 13:38:00,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +12: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +12: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +12: [2023-05-25 13:38:00,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 8: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:38:00,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +12: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +15: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +15: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +13: [2023-05-25 13:38:00,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt... +13: [2023-05-25 13:38:00,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt... +13: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... +13: [2023-05-25 13:38:00,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt... + 9: [2023-05-25 13:38:00,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +11: [2023-05-25 13:38:00,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +10: [2023-05-25 13:38:00,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +14: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +16: [2023-05-25 13:38:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +10: [2023-05-25 13:38:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +11: [2023-05-25 13:38:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +15: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +16: [2023-05-25 13:38:00,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 2: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... + 2: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +16: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +12: [2023-05-25 13:38:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +11: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 8: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +16: [2023-05-25 13:38:00,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +13: [2023-05-25 13:38:00,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +13: [2023-05-25 13:38:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. +12: [2023-05-25 13:38:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +10: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +15: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 4: [2023-05-25 13:38:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +25: [2023-05-25 13:38:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +25: [2023-05-25 13:38:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +15: [2023-05-25 13:38:00,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +16: [2023-05-25 13:38:00,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +17: [2023-05-25 13:38:00,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +17: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +17: [2023-05-25 13:38:00,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +16: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +17: [2023-05-25 13:38:00,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +16: [2023-05-25 13:38:00,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +25: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +22: [2023-05-25 13:38:00,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +22: [2023-05-25 13:38:00,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +23: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +22: [2023-05-25 13:38:00,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +22: [2023-05-25 13:38:00,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +22: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +22: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +20: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +23: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +18: [2023-05-25 13:38:00,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +23: [2023-05-25 13:38:00,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +25: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +27: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +20: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +27: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +20: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +25: [2023-05-25 13:38:00,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +28: [2023-05-25 13:38:00,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +17: [2023-05-25 13:38:00,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +17: [2023-05-25 13:38:00,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +28: [2023-05-25 13:38:00,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... + 0: [2023-05-25 13:38:00,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 0: [2023-05-25 13:38:00,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +29: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +30: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +30: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +26: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +29: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +17: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +29: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +31: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +24: [2023-05-25 13:38:00,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +24: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... +31: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt... +31: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +29: [2023-05-25 13:38:00,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt... +26: [2023-05-25 13:38:00,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... +26: [2023-05-25 13:38:00,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 1: [2023-05-25 13:38:00,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 1: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 0: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +23: [2023-05-25 13:38:00,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt... +23: [2023-05-25 13:38:00,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... + 6: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt... +18: [2023-05-25 13:38:00,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +27: [2023-05-25 13:38:00,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +20: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +22: [2023-05-25 13:38:00,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +18: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +27: [2023-05-25 13:38:00,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +23: [2023-05-25 13:38:00,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +28: [2023-05-25 13:38:00,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +27: [2023-05-25 13:38:00,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +22: [2023-05-25 13:38:00,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +22: [2023-05-25 13:38:00,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +24: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +29: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +28: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +26: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +26: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. +31: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 1: [2023-05-25 13:38:00,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +30: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_00-model_states.pt. +21: [2023-05-25 13:38:00,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +31: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +24: [2023-05-25 13:38:00,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:38:00,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt... +19: [2023-05-25 13:38:00,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt... +21: [2023-05-25 13:38:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +21: [2023-05-25 13:38:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. +19: [2023-05-25 13:38:00,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +19: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 2: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 2: [2023-05-25 13:38:00,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +15: [2023-05-25 13:38:00,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +23: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +23: [2023-05-25 13:38:00,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +13: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +14: [2023-05-25 13:38:00,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 6: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +11: [2023-05-25 13:38:00,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +11: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +11: [2023-05-25 13:38:00,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +14: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +12: [2023-05-25 13:38:00,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +11: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 9: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 9: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +18: [2023-05-25 13:38:00,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +11: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +20: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 8: [2023-05-25 13:38:00,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +17: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +10: [2023-05-25 13:38:00,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +18: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 3: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +10: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +18: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +18: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +22: [2023-05-25 13:38:00,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. + 4: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +17: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +26: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +15: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 0: [2023-05-25 13:38:00,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +16: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +23: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +20: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +17: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +13: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +22: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +10: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +10: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +18: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +18: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 9: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 8: [2023-05-25 13:38:00,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 6: [2023-05-25 13:38:00,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 6: [2023-05-25 13:38:00,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:00,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. +15: [2023-05-25 13:38:00,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +16: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +22: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +24: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +24: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +13: [2023-05-25 13:38:00,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +17: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +18: [2023-05-25 13:38:00,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +18: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +14: [2023-05-25 13:38:00,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +16: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +16: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +14: [2023-05-25 13:38:00,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +22: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +13: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 1: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +15: [2023-05-25 13:38:00,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +14: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +13: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +11: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +12: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +12: [2023-05-25 13:38:00,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +12: [2023-05-25 13:38:00,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +15: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +16: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +28: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +26: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +26: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +28: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +16: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +27: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +11: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +24: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +18: [2023-05-25 13:38:00,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +18: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +29: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +24: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 8: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +14: [2023-05-25 13:38:00,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +31: [2023-05-25 13:38:00,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +24: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 2: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +11: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +27: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +30: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. + 4: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +16: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 9: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 9: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +27: [2023-05-25 13:38:00,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +16: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +23: [2023-05-25 13:38:00,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 9: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 9: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +30: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +25: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +25: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_01-model_states.pt. +10: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +10: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 3: [2023-05-25 13:38:00,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +30: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 3: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +25: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +15: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +30: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +25: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +19: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +26: [2023-05-25 13:38:00,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +26: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +15: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 1: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. + 6: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +20: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +20: [2023-05-25 13:38:00,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. + 0: [2023-05-25 13:38:00,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 5: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +28: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. + 1: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. + 6: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +29: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +12: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +24: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +28: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +10: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +10: [2023-05-25 13:38:00,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +23: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +12: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +17: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +20: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_03-model_states.pt. +10: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +20: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +31: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +17: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +28: [2023-05-25 13:38:00,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +21: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +15: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +15: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +26: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +12: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +12: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +30: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +25: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +19: [2023-05-25 13:38:00,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +27: [2023-05-25 13:38:00,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +25: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +26: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +14: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +14: [2023-05-25 13:38:00,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +20: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:00,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +30: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 0: [2023-05-25 13:38:00,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +20: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +29: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. +12: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +29: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_03-model_states.pt. +28: [2023-05-25 13:38:00,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_03-model_states.pt. +10: [2023-05-25 13:38:00,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +17: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 1: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +31: [2023-05-25 13:38:00,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +11: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +19: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +21: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +12: [2023-05-25 13:38:00,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +21: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +20: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_01-model_states.pt. +21: [2023-05-25 13:38:00,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +31: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +13: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +20: [2023-05-25 13:38:00,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_01-model_01-model_states.pt. +21: [2023-05-25 13:38:00,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +11: [2023-05-25 13:38:00,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +14: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:00,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +21: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +21: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_03-model_states.pt. +10: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +24: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +24: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +13: [2023-05-25 13:38:00,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 7: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +13: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +12: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +12: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +15: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +10: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +19: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +13: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +19: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +13: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 7: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 4: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +11: [2023-05-25 13:38:00,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 2: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +21: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +15: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +21: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +11: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 4: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +12: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 5: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +15: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt... +15: [2023-05-25 13:38:00,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +24: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:00,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +22: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. +22: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_25-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +24: [2023-05-25 13:38:00,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +21: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +15: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +13: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +12: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_14-model_02-model_states.pt. +19: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:00,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 5: [2023-05-25 13:38:00,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:00,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:00,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +12: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +12: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +29: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +29: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 5: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +15: [2023-05-25 13:38:00,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +27: [2023-05-25 13:38:00,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:00,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:00,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +25: [2023-05-25 13:38:00,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +15: [2023-05-25 13:38:00,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +15: [2023-05-25 13:38:00,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +15: [2023-05-25 13:38:00,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +10: [2023-05-25 13:38:00,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:00,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +15: [2023-05-25 13:38:00,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +10: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +10: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +29: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +22: [2023-05-25 13:38:00,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +29: [2023-05-25 13:38:00,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +28: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +28: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 8: [2023-05-25 13:38:00,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +22: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +25: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:00,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt... +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +10: [2023-05-25 13:38:00,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. + 8: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +10: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +28: [2023-05-25 13:38:00,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:00,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +13: [2023-05-25 13:38:00,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +31: [2023-05-25 13:38:00,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 2: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 8: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +31: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +13: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... + 4: [2023-05-25 13:38:00,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +12: [2023-05-25 13:38:00,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +31: [2023-05-25 13:38:00,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +13: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +12: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 5: [2023-05-25 13:38:00,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:00,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +30: [2023-05-25 13:38:00,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +31: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_36-model_02-model_states.pt. +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +30: [2023-05-25 13:38:00,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +29: [2023-05-25 13:38:00,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 4: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +29: [2023-05-25 13:38:00,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 6: [2023-05-25 13:38:00,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:00,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +16: [2023-05-25 13:38:01,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +16: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +16: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +28: [2023-05-25 13:38:01,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +27: [2023-05-25 13:38:01,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +28: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +27: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +27: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +29: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. +29: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +28: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +29: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:01,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +17: [2023-05-25 13:38:01,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +28: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +13: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +13: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +24: [2023-05-25 13:38:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +28: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +13: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt... +26: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +26: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +23: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +24: [2023-05-25 13:38:01,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +26: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +26: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +31: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +24: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +23: [2023-05-25 13:38:01,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +26: [2023-05-25 13:38:01,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +26: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +26: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +28: [2023-05-25 13:38:01,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +29: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +25: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt... +30: [2023-05-25 13:38:01,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +16: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +25: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +31: [2023-05-25 13:38:01,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +20: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +30: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +20: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +16: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +20: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +20: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +20: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +18: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +22: [2023-05-25 13:38:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +22: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +29: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +29: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +30: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +27: [2023-05-25 13:38:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +16: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +22: [2023-05-25 13:38:01,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +22: [2023-05-25 13:38:01,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +17: [2023-05-25 13:38:01,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +30: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +17: [2023-05-25 13:38:01,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +31: [2023-05-25 13:38:01,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +31: [2023-05-25 13:38:01,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +16: [2023-05-25 13:38:01,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +23: [2023-05-25 13:38:01,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +23: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... +31: [2023-05-25 13:38:01,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. +26: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +26: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +24: [2023-05-25 13:38:01,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +17: [2023-05-25 13:38:01,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +27: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +25: [2023-05-25 13:38:01,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_00-model_states.pt. +23: [2023-05-25 13:38:01,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +27: [2023-05-25 13:38:01,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +18: [2023-05-25 13:38:01,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +20: [2023-05-25 13:38:01,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +26: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:01,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:01,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +25: [2023-05-25 13:38:01,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +18: [2023-05-25 13:38:01,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +14: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +14: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +19: [2023-05-25 13:38:01,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +19: [2023-05-25 13:38:01,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +19: [2023-05-25 13:38:01,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +14: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt... +21: [2023-05-25 13:38:01,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt... +21: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +21: [2023-05-25 13:38:01,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt... +11: [2023-05-25 13:38:01,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +19: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +19: [2023-05-25 13:38:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +15: [2023-05-25 13:38:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +19: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +21: [2023-05-25 13:38:01,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_00-model_states.pt. +12: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +12: [2023-05-25 13:38:01,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +21: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +13: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +13: [2023-05-25 13:38:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +15: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +12: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +15: [2023-05-25 13:38:01,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +15: [2023-05-25 13:38:01,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +15: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +13: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +11: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +13: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +12: [2023-05-25 13:38:01,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +11: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +14: [2023-05-25 13:38:01,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:01,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +10: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +15: [2023-05-25 13:38:01,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +12: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +12: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +11: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +14: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_03-model_states.pt. +10: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +14: [2023-05-25 13:38:01,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +13: [2023-05-25 13:38:01,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. +13: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +28: [2023-05-25 13:38:01,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +28: [2023-05-25 13:38:01,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +10: [2023-05-25 13:38:01,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +15: [2023-05-25 13:38:01,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +17: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +13: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +28: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +12: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +12: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +13: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +28: [2023-05-25 13:38:01,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +14: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +11: [2023-05-25 13:38:01,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +16: [2023-05-25 13:38:01,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +11: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +13: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +12: [2023-05-25 13:38:01,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +17: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +11: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +12: [2023-05-25 13:38:01,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +11: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +11: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +14: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +16: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +16: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +11: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 8: [2023-05-25 13:38:01,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +11: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +14: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +12: [2023-05-25 13:38:01,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +14: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +13: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +12: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +12: [2023-05-25 13:38:01,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +15: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:01,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +14: [2023-05-25 13:38:01,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +29: [2023-05-25 13:38:01,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +12: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +12: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +29: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +14: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +28: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +15: [2023-05-25 13:38:01,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +15: [2023-05-25 13:38:01,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... +26: [2023-05-25 13:38:01,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +10: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +16: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +16: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +17: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +17: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +23: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +23: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +26: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +10: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_03-model_states.pt. +13: [2023-05-25 13:38:01,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +13: [2023-05-25 13:38:01,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt... +13: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +13: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt... +10: [2023-05-25 13:38:01,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +10: [2023-05-25 13:38:01,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +26: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +31: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +30: [2023-05-25 13:38:01,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +30: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +15: [2023-05-25 13:38:01,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +15: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_15-model_01-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +30: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +18: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +18: [2023-05-25 13:38:01,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +30: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +26: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +26: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +23: [2023-05-25 13:38:01,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +25: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +23: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +30: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +16: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +31: [2023-05-25 13:38:01,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +14: [2023-05-25 13:38:01,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +31: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +10: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +14: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +14: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +22: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +31: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +20: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +20: [2023-05-25 13:38:01,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +23: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +10: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +18: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +23: [2023-05-25 13:38:01,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +26: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +29: [2023-05-25 13:38:01,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +27: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +25: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +15: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +15: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +13: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:01,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +31: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +31: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +28: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +24: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +28: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +26: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:01,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +13: [2023-05-25 13:38:01,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +28: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +28: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +31: [2023-05-25 13:38:01,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +31: [2023-05-25 13:38:01,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_03-model_states.pt. +29: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +20: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +27: [2023-05-25 13:38:01,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +27: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +19: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +19: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +29: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +25: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +25: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +22: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +29: [2023-05-25 13:38:01,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +28: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +28: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +10: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +29: [2023-05-25 13:38:01,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +21: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +27: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +21: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +22: [2023-05-25 13:38:01,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +17: [2023-05-25 13:38:01,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +24: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +27: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +19: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +24: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +24: [2023-05-25 13:38:01,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_01-model_states.pt. +24: [2023-05-25 13:38:01,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +19: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +15: [2023-05-25 13:38:01,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +29: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +25: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +15: [2023-05-25 13:38:01,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +29: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +16: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +28: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +16: [2023-05-25 13:38:01,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +28: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +17: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_02-model_states.pt. +27: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +24: [2023-05-25 13:38:01,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +24: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +21: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +25: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +21: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +26: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +15: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_00-model_states.pt. +30: [2023-05-25 13:38:01,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +19: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +26: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +19: [2023-05-25 13:38:01,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_03-model_02-model_states.pt. +31: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +26: [2023-05-25 13:38:01,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +15: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt... +29: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +29: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_02-model_states.pt. +28: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +24: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +24: [2023-05-25 13:38:01,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +26: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +31: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +31: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt... +28: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +18: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +18: [2023-05-25 13:38:01,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +16: [2023-05-25 13:38:01,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +29: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +16: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +26: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +30: [2023-05-25 13:38:01,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +30: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +18: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +29: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +31: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +29: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +31: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +17: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +30: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +30: [2023-05-25 13:38:01,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +30: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +29: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +30: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +17: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +30: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +30: [2023-05-25 13:38:01,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +28: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +24: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +17: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +22: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +31: [2023-05-25 13:38:01,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... +28: [2023-05-25 13:38:01,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +18: [2023-05-25 13:38:01,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:01,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +20: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +18: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +28: [2023-05-25 13:38:01,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +26: [2023-05-25 13:38:01,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +26: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +22: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +31: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +27: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +28: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +17: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +24: [2023-05-25 13:38:01,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +17: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +24: [2023-05-25 13:38:01,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +17: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +30: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +24: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +19: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +22: [2023-05-25 13:38:01,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +19: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +27: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +25: [2023-05-25 13:38:01,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt... +27: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. +27: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_37-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +27: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +23: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +24: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +18: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +29: [2023-05-25 13:38:01,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +20: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +17: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +21: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +29: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +21: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_03-model_states.pt. +25: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +25: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +18: [2023-05-25 13:38:01,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... +24: [2023-05-25 13:38:01,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +29: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +25: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +25: [2023-05-25 13:38:01,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +24: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +22: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +29: [2023-05-25 13:38:01,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt... +28: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +22: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +24: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +24: [2023-05-25 13:38:01,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +20: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +24: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +22: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +18: [2023-05-25 13:38:01,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +22: [2023-05-25 13:38:01,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +26: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +17: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +27: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +20: [2023-05-25 13:38:01,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt... +18: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +18: [2023-05-25 13:38:01,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_00-model_states.pt. +28: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +16: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +19: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +18: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +16: [2023-05-25 13:38:01,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +20: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +16: [2023-05-25 13:38:01,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt... +20: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +20: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +28: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +26: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +26: [2023-05-25 13:38:01,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +21: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +25: [2023-05-25 13:38:01,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +22: [2023-05-25 13:38:01,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +11: [2023-05-25 13:38:01,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:01,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +11: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +11: [2023-05-25 13:38:01,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +23: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +21: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +25: [2023-05-25 13:38:01,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +22: [2023-05-25 13:38:01,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +23: [2023-05-25 13:38:01,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +22: [2023-05-25 13:38:01,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +19: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +19: [2023-05-25 13:38:01,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_26-model_01-model_states.pt. +14: [2023-05-25 13:38:01,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +22: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +16: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +25: [2023-05-25 13:38:01,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +14: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +22: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +20: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +27: [2023-05-25 13:38:01,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_00-model_states.pt. +20: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +27: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +21: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +13: [2023-05-25 13:38:01,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +21: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt... +27: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt... +20: [2023-05-25 13:38:01,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +13: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +13: [2023-05-25 13:38:01,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +19: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +21: [2023-05-25 13:38:01,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt... +16: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +12: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt... +12: [2023-05-25 13:38:01,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +21: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +19: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +19: [2023-05-25 13:38:01,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. +21: [2023-05-25 13:38:01,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +19: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +19: [2023-05-25 13:38:01,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +13: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +12: [2023-05-25 13:38:01,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt... +21: [2023-05-25 13:38:01,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +11: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +11: [2023-05-25 13:38:01,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:01,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +12: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +12: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +14: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +14: [2023-05-25 13:38:01,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +10: [2023-05-25 13:38:01,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +15: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +13: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +12: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +14: [2023-05-25 13:38:01,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +14: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +28: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +28: [2023-05-25 13:38:01,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +13: [2023-05-25 13:38:01,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +11: [2023-05-25 13:38:01,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +15: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +26: [2023-05-25 13:38:01,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +12: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +12: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +12: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +11: [2023-05-25 13:38:01,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +11: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +10: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_03-model_states.pt. +28: [2023-05-25 13:38:01,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +10: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +26: [2023-05-25 13:38:01,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +13: [2023-05-25 13:38:01,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +14: [2023-05-25 13:38:01,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +13: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +27: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +14: [2023-05-25 13:38:01,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +14: [2023-05-25 13:38:01,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +10: [2023-05-25 13:38:01,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +10: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +10: [2023-05-25 13:38:01,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +24: [2023-05-25 13:38:01,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +10: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +24: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +15: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +31: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +12: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +15: [2023-05-25 13:38:01,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +15: [2023-05-25 13:38:01,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +12: [2023-05-25 13:38:01,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +15: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +27: [2023-05-25 13:38:01,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +27: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +12: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +26: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:01,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +31: [2023-05-25 13:38:01,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +13: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +13: [2023-05-25 13:38:01,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +19: [2023-05-25 13:38:01,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +24: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:01,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +19: [2023-05-25 13:38:01,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +31: [2023-05-25 13:38:01,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. +15: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +10: [2023-05-25 13:38:01,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +10: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +10: [2023-05-25 13:38:01,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt... +29: [2023-05-25 13:38:01,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +29: [2023-05-25 13:38:01,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +25: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +23: [2023-05-25 13:38:01,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +29: [2023-05-25 13:38:01,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +19: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +17: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +17: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +30: [2023-05-25 13:38:01,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +31: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +31: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +22: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +22: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +30: [2023-05-25 13:38:01,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +30: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +15: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +24: [2023-05-25 13:38:01,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +24: [2023-05-25 13:38:01,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +29: [2023-05-25 13:38:01,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_02-model_states.pt. +18: [2023-05-25 13:38:01,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +23: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +17: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +25: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +25: [2023-05-25 13:38:01,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +22: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +21: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. +18: [2023-05-25 13:38:01,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +18: [2023-05-25 13:38:01,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +13: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +13: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +14: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +12: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +11: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +15: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +16: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +11: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +12: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +15: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +20: [2023-05-25 13:38:01,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +20: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +10: [2023-05-25 13:38:01,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_16-model_02-model_states.pt. +25: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +25: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +15: [2023-05-25 13:38:01,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +21: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +30: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +30: [2023-05-25 13:38:01,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +15: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +21: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 4: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +16: [2023-05-25 13:38:01,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +15: [2023-05-25 13:38:01,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt... +11: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +11: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +16: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +14: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +14: [2023-05-25 13:38:01,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +12: [2023-05-25 13:38:01,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +10: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +13: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +15: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +20: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. +20: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... +27: [2023-05-25 13:38:01,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +27: [2023-05-25 13:38:01,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_03-model_states.pt. +30: [2023-05-25 13:38:01,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +20: [2023-05-25 13:38:01,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:01,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 6: [2023-05-25 13:38:01,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +27: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +28: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +28: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +14: [2023-05-25 13:38:01,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +11: [2023-05-25 13:38:01,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +23: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +20: [2023-05-25 13:38:01,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:01,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:01,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +14: [2023-05-25 13:38:01,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +20: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +12: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +11: [2023-05-25 13:38:01,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +28: [2023-05-25 13:38:01,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +14: [2023-05-25 13:38:01,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +12: [2023-05-25 13:38:01,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +12: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +28: [2023-05-25 13:38:01,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +12: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +10: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +13: [2023-05-25 13:38:01,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +15: [2023-05-25 13:38:01,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +15: [2023-05-25 13:38:01,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +13: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +18: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +15: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +18: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +23: [2023-05-25 13:38:01,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +23: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +10: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +26: [2023-05-25 13:38:01,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +26: [2023-05-25 13:38:01,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +10: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +19: [2023-05-25 13:38:01,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_00-model_states.pt. +17: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... + 9: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt... +18: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +28: [2023-05-25 13:38:01,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +18: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +30: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +16: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +16: [2023-05-25 13:38:01,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +29: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +29: [2023-05-25 13:38:01,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +30: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +22: [2023-05-25 13:38:01,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +22: [2023-05-25 13:38:01,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +23: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 1: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +17: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +23: [2023-05-25 13:38:01,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 1: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 7: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +17: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +23: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +29: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +29: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +16: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt... +23: [2023-05-25 13:38:01,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +23: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +26: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +28: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 6: [2023-05-25 13:38:01,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 5: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +21: [2023-05-25 13:38:01,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +22: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_02-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +19: [2023-05-25 13:38:01,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +19: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 0: [2023-05-25 13:38:01,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 3: [2023-05-25 13:38:01,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +23: [2023-05-25 13:38:01,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +19: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +26: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +26: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +22: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +22: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_03-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +20: [2023-05-25 13:38:01,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +16: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +20: [2023-05-25 13:38:01,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +17: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +17: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +19: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +17: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... + 3: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +18: [2023-05-25 13:38:01,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +18: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt... +21: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +17: [2023-05-25 13:38:01,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +18: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. + 2: [2023-05-25 13:38:01,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +22: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +17: [2023-05-25 13:38:01,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +16: [2023-05-25 13:38:01,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +24: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +30: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +17: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +16: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +30: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +29: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +23: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +21: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +21: [2023-05-25 13:38:01,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +20: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 0: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +26: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +26: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_04-model_02-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +23: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +31: [2023-05-25 13:38:01,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +22: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 5: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +20: [2023-05-25 13:38:01,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +23: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +28: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +18: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +28: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +23: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +25: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +31: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +31: [2023-05-25 13:38:01,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +31: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +19: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +22: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +18: [2023-05-25 13:38:01,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +27: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +27: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +24: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +31: [2023-05-25 13:38:01,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +16: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +16: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +27: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +27: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +27: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +27: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... +25: [2023-05-25 13:38:01,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 4: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt... + 2: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt... +25: [2023-05-25 13:38:01,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +24: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 7: [2023-05-25 13:38:01,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +21: [2023-05-25 13:38:01,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +19: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +26: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +21: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +26: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +20: [2023-05-25 13:38:01,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +21: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... + 3: [2023-05-25 13:38:01,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +19: [2023-05-25 13:38:01,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt... +31: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +22: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +16: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +16: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +20: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +16: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +21: [2023-05-25 13:38:01,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +28: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +21: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt... +28: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +17: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +17: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +25: [2023-05-25 13:38:01,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +20: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +16: [2023-05-25 13:38:01,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +29: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +29: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +25: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +20: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 0: [2023-05-25 13:38:01,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. + 1: [2023-05-25 13:38:01,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +27: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:01,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +22: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... + 1: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +20: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +27: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +31: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +18: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +20: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +17: [2023-05-25 13:38:01,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +24: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +24: [2023-05-25 13:38:01,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... + 4: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt... +12: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +24: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +11: [2023-05-25 13:38:01,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +29: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +21: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 4: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +11: [2023-05-25 13:38:01,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +24: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +18: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +24: [2023-05-25 13:38:01,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +26: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +26: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. + 4: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +17: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:01,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +21: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +18: [2023-05-25 13:38:01,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +21: [2023-05-25 13:38:01,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +26: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +26: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +14: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +31: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +14: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +27: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +22: [2023-05-25 13:38:01,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... + 0: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_00-model_states.pt. +27: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +13: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +16: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +31: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +13: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +24: [2023-05-25 13:38:01,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +15: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt... +16: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +16: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +13: [2023-05-25 13:38:01,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +31: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +16: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +15: [2023-05-25 13:38:01,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +28: [2023-05-25 13:38:01,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +25: [2023-05-25 13:38:01,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +28: [2023-05-25 13:38:01,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +19: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +25: [2023-05-25 13:38:01,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +19: [2023-05-25 13:38:01,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_27-model_01-model_states.pt. +25: [2023-05-25 13:38:01,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:01,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +27: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_38-model_01-model_states.pt. +25: [2023-05-25 13:38:01,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +17: [2023-05-25 13:38:01,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +25: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +17: [2023-05-25 13:38:01,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +11: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +17: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +11: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +19: [2023-05-25 13:38:01,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... +10: [2023-05-25 13:38:01,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +27: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +13: [2023-05-25 13:38:01,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +11: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +11: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +27: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt... +14: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +19: [2023-05-25 13:38:01,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +12: [2023-05-25 13:38:01,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +12: [2023-05-25 13:38:01,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_00-model_states.pt. +10: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +19: [2023-05-25 13:38:01,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt... +15: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +15: [2023-05-25 13:38:01,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +27: [2023-05-25 13:38:01,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_00-model_states.pt. +13: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:01,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt... +11: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +14: [2023-05-25 13:38:01,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +11: [2023-05-25 13:38:01,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +13: [2023-05-25 13:38:01,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:01,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +13: [2023-05-25 13:38:01,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +15: [2023-05-25 13:38:01,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +15: [2023-05-25 13:38:01,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +13: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +13: [2023-05-25 13:38:01,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +10: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +12: [2023-05-25 13:38:01,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +14: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +14: [2023-05-25 13:38:01,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_01-model_states.pt. +15: [2023-05-25 13:38:01,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 8: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 4: [2023-05-25 13:38:01,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +14: [2023-05-25 13:38:01,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +11: [2023-05-25 13:38:01,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +15: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +15: [2023-05-25 13:38:01,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +10: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +10: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +14: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +15: [2023-05-25 13:38:01,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +14: [2023-05-25 13:38:01,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +12: [2023-05-25 13:38:01,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 4: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +14: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +11: [2023-05-25 13:38:01,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. +10: [2023-05-25 13:38:01,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +15: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +14: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +14: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 3: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 3: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +12: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +12: [2023-05-25 13:38:01,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 2: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +10: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +15: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +12: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +14: [2023-05-25 13:38:01,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 6: [2023-05-25 13:38:01,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +10: [2023-05-25 13:38:01,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +12: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 9: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +23: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +23: [2023-05-25 13:38:01,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +26: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. + 9: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... +26: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +28: [2023-05-25 13:38:01,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +25: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +29: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +25: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +29: [2023-05-25 13:38:01,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +22: [2023-05-25 13:38:01,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +22: [2023-05-25 13:38:01,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 5: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +11: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +11: [2023-05-25 13:38:01,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +26: [2023-05-25 13:38:01,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +26: [2023-05-25 13:38:01,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +11: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +11: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 2: [2023-05-25 13:38:01,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt... + 2: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 5: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 8: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 7: [2023-05-25 13:38:01,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +29: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +26: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +26: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +30: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +10: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +25: [2023-05-25 13:38:01,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +15: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +25: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 8: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 8: [2023-05-25 13:38:01,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 5: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +22: [2023-05-25 13:38:01,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:01,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:01,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +15: [2023-05-25 13:38:01,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 7: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 9: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +26: [2023-05-25 13:38:01,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 5: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 9: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +10: [2023-05-25 13:38:01,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 0: [2023-05-25 13:38:01,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +15: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +15: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:01,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +12: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +24: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +15: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:01,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. + 7: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +22: [2023-05-25 13:38:01,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +29: [2023-05-25 13:38:01,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 3: [2023-05-25 13:38:01,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +12: [2023-05-25 13:38:01,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +17: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +12: [2023-05-25 13:38:01,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... +17: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +27: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +27: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +20: [2023-05-25 13:38:01,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +20: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 3: [2023-05-25 13:38:01,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +10: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt... + 1: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. + 7: [2023-05-25 13:38:01,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +13: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 9: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +21: [2023-05-25 13:38:01,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +14: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +23: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 1: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +23: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +21: [2023-05-25 13:38:01,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 6: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 0: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +11: [2023-05-25 13:38:01,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +11: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 6: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 6: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +24: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +18: [2023-05-25 13:38:01,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 9: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:01,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +29: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +24: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 6: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +22: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:01,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:01,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +19: [2023-05-25 13:38:01,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +10: [2023-05-25 13:38:01,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +19: [2023-05-25 13:38:01,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +27: [2023-05-25 13:38:01,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +17: [2023-05-25 13:38:01,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +10: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +17: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +31: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +30: [2023-05-25 13:38:01,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +30: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_03-model_states.pt. +12: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +12: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 1: [2023-05-25 13:38:01,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +10: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 8: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. + 8: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_17-model_02-model_states.pt. +27: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +31: [2023-05-25 13:38:01,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +14: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +23: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:01,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +13: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +21: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:01,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +13: [2023-05-25 13:38:01,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +11: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +11: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +17: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +28: [2023-05-25 13:38:01,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +28: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 2: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 2: [2023-05-25 13:38:01,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +18: [2023-05-25 13:38:02,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +18: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_01-model_states.pt. +18: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +16: [2023-05-25 13:38:02,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +19: [2023-05-25 13:38:02,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:02,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +19: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +30: [2023-05-25 13:38:02,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +31: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +12: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +22: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +12: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt... +26: [2023-05-25 13:38:02,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +26: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +23: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +23: [2023-05-25 13:38:02,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +16: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +24: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +31: [2023-05-25 13:38:02,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +16: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +18: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +18: [2023-05-25 13:38:02,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +17: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +16: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +28: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +17: [2023-05-25 13:38:02,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +28: [2023-05-25 13:38:02,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:02,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +21: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_02-model_states.pt. +16: [2023-05-25 13:38:02,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +19: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +18: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +20: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +23: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +26: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +22: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +14: [2023-05-25 13:38:02,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +21: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +21: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +24: [2023-05-25 13:38:02,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +16: [2023-05-25 13:38:02,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +14: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +14: [2023-05-25 13:38:02,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +15: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +15: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +27: [2023-05-25 13:38:02,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +16: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +27: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +13: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +13: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +14: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +16: [2023-05-25 13:38:02,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +16: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +28: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +21: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +13: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +18: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +13: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +11: [2023-05-25 13:38:02,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +29: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +28: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +30: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +27: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +28: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +21: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +30: [2023-05-25 13:38:02,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +29: [2023-05-25 13:38:02,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +29: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +19: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. +19: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +27: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +27: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +28: [2023-05-25 13:38:02,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +21: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +21: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +25: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. +30: [2023-05-25 13:38:02,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:02,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +25: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +12: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +30: [2023-05-25 13:38:02,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +20: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +30: [2023-05-25 13:38:02,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +12: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt... +24: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +20: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +26: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +20: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +24: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +24: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +19: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +28: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +28: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +31: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +28: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +28: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +31: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... +31: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +31: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +28: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +25: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:02,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt... +14: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +14: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +26: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +26: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +29: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +26: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +26: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +27: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +20: [2023-05-25 13:38:02,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +15: [2023-05-25 13:38:02,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +27: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +27: [2023-05-25 13:38:02,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_01-model_states.pt. +13: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +28: [2023-05-25 13:38:02,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +27: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +12: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +28: [2023-05-25 13:38:02,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +13: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +30: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +25: [2023-05-25 13:38:02,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +30: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +12: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +30: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +29: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +17: [2023-05-25 13:38:02,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +29: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +17: [2023-05-25 13:38:02,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_28-model_03-model_states.pt. +15: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +15: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +28: [2023-05-25 13:38:02,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +24: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +26: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +27: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +28: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +26: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +28: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +25: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +30: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +29: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +25: [2023-05-25 13:38:02,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +30: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +29: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_02-model_states.pt. +24: [2023-05-25 13:38:02,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +17: [2023-05-25 13:38:02,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +24: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +26: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +26: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +14: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +10: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +17: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +10: [2023-05-25 13:38:02,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +28: [2023-05-25 13:38:02,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +25: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +12: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_05-model_03-model_states.pt. +25: [2023-05-25 13:38:02,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +11: [2023-05-25 13:38:02,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +14: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +29: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +14: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +14: [2023-05-25 13:38:02,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +29: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +29: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +31: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +27: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +27: [2023-05-25 13:38:02,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +27: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +11: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:02,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +11: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +27: [2023-05-25 13:38:02,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... +27: [2023-05-25 13:38:02,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +27: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt... +24: [2023-05-25 13:38:02,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +15: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +13: [2023-05-25 13:38:02,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +25: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_39-model_02-model_states.pt. +13: [2023-05-25 13:38:02,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +14: [2023-05-25 13:38:02,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +24: [2023-05-25 13:38:02,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +31: [2023-05-25 13:38:02,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +24: [2023-05-25 13:38:02,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +24: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt... +15: [2023-05-25 13:38:02,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +25: [2023-05-25 13:38:02,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +25: [2023-05-25 13:38:02,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt... +10: [2023-05-25 13:38:02,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +14: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +14: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +19: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_00-model_states.pt. +22: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +10: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt... +23: [2023-05-25 13:38:02,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +14: [2023-05-25 13:38:02,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +31: [2023-05-25 13:38:02,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +31: [2023-05-25 13:38:02,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +31: [2023-05-25 13:38:02,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +15: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +12: [2023-05-25 13:38:02,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +31: [2023-05-25 13:38:02,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +15: [2023-05-25 13:38:02,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +15: [2023-05-25 13:38:02,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +10: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +11: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +10: [2023-05-25 13:38:02,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +25: [2023-05-25 13:38:02,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_00-model_states.pt. +25: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +25: [2023-05-25 13:38:02,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt... +19: [2023-05-25 13:38:02,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +13: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_01-model_states.pt. +23: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +23: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +22: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +11: [2023-05-25 13:38:02,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +21: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +16: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +22: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +18: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +16: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +16: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +17: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +16: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +16: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +21: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +17: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +18: [2023-05-25 13:38:02,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +18: [2023-05-25 13:38:02,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +19: [2023-05-25 13:38:02,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +19: [2023-05-25 13:38:02,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +15: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +15: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +10: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +12: [2023-05-25 13:38:02,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +12: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +22: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +14: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +14: [2023-05-25 13:38:02,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +13: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. +13: [2023-05-25 13:38:02,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +12: [2023-05-25 13:38:02,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +14: [2023-05-25 13:38:02,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +15: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +21: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +12: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +11: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +16: [2023-05-25 13:38:02,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +10: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +10: [2023-05-25 13:38:02,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +11: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +17: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +17: [2023-05-25 13:38:02,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +18: [2023-05-25 13:38:02,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +16: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +10: [2023-05-25 13:38:02,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +21: [2023-05-25 13:38:02,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +21: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +13: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +17: [2023-05-25 13:38:02,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +11: [2023-05-25 13:38:02,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +27: [2023-05-25 13:38:02,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +13: [2023-05-25 13:38:02,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +13: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt... +15: [2023-05-25 13:38:02,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +14: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +15: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +14: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +15: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +12: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +12: [2023-05-25 13:38:02,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +12: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +12: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 8: [2023-05-25 13:38:02,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +11: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +27: [2023-05-25 13:38:02,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +20: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. +13: [2023-05-25 13:38:02,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt... +20: [2023-05-25 13:38:02,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt... +20: [2023-05-25 13:38:02,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +13: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +31: [2023-05-25 13:38:02,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:02,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +26: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +26: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +20: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +20: [2023-05-25 13:38:02,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +25: [2023-05-25 13:38:02,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +20: [2023-05-25 13:38:02,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +20: [2023-05-25 13:38:02,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +10: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +10: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +30: [2023-05-25 13:38:02,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +30: [2023-05-25 13:38:02,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +13: [2023-05-25 13:38:02,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +10: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +10: [2023-05-25 13:38:02,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +29: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +29: [2023-05-25 13:38:02,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +11: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +28: [2023-05-25 13:38:02,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_01-model_states.pt. +30: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +31: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +24: [2023-05-25 13:38:02,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +14: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:02,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +24: [2023-05-25 13:38:02,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +26: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +24: [2023-05-25 13:38:02,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +24: [2023-05-25 13:38:02,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:02,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +26: [2023-05-25 13:38:02,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +13: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +29: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +28: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +27: [2023-05-25 13:38:02,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +31: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +14: [2023-05-25 13:38:02,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +29: [2023-05-25 13:38:02,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +24: [2023-05-25 13:38:02,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +28: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +26: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +24: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +28: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +26: [2023-05-25 13:38:02,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_03-model_states.pt. +30: [2023-05-25 13:38:02,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. +10: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +10: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +29: [2023-05-25 13:38:02,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +10: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +10: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +24: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +12: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +15: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +12: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_06-model_02-model_states.pt. +25: [2023-05-25 13:38:02,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_01-model_states.pt. +27: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +27: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +11: [2023-05-25 13:38:02,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +11: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +15: [2023-05-25 13:38:02,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +11: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +27: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +14: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 0: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +30: [2023-05-25 13:38:02,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_18-model_02-model_states.pt. +14: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +14: [2023-05-25 13:38:02,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +13: [2023-05-25 13:38:02,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +24: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +30: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +15: [2023-05-25 13:38:02,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +12: [2023-05-25 13:38:02,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +25: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +13: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +13: [2023-05-25 13:38:02,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +15: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +27: [2023-05-25 13:38:02,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +27: [2023-05-25 13:38:02,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +28: [2023-05-25 13:38:02,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt... +25: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +30: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +30: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +31: [2023-05-25 13:38:02,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_40-model_02-model_states.pt. +25: [2023-05-25 13:38:02,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +27: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:02,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +23: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +27: [2023-05-25 13:38:02,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +25: [2023-05-25 13:38:02,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +29: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +29: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +15: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +28: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +24: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +29: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 6: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +29: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +27: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +31: [2023-05-25 13:38:02,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +26: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +12: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +24: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +31: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +26: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +12: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +24: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +27: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +12: [2023-05-25 13:38:02,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +24: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +26: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... +28: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt... +23: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +12: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +23: [2023-05-25 13:38:02,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +24: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +27: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... +30: [2023-05-25 13:38:02,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +28: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +30: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +25: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +15: [2023-05-25 13:38:02,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +28: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... +19: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:02,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +30: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... +15: [2023-05-25 13:38:02,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +19: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_00-model_states.pt. +30: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt... +27: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 8: [2023-05-25 13:38:02,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt... +11: [2023-05-25 13:38:02,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +11: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt... +30: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +12: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +30: [2023-05-25 13:38:02,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +20: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +12: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +27: [2023-05-25 13:38:02,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +12: [2023-05-25 13:38:02,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +19: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +27: [2023-05-25 13:38:02,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +25: [2023-05-25 13:38:02,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +27: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +31: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +19: [2023-05-25 13:38:02,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +29: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +29: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +30: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +16: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +16: [2023-05-25 13:38:02,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +25: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +11: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:02,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +30: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +25: [2023-05-25 13:38:02,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +20: [2023-05-25 13:38:02,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +31: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +25: [2023-05-25 13:38:02,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +31: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +28: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +15: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +26: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +26: [2023-05-25 13:38:02,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +15: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +24: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. +24: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +31: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +28: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +22: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +22: [2023-05-25 13:38:02,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +29: [2023-05-25 13:38:02,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +21: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +28: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt... +21: [2023-05-25 13:38:02,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_00-model_states.pt. +28: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +16: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +18: [2023-05-25 13:38:02,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +30: [2023-05-25 13:38:02,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +15: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +22: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +15: [2023-05-25 13:38:02,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +22: [2023-05-25 13:38:02,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +26: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +23: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +26: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +23: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +22: [2023-05-25 13:38:02,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +12: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +19: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +19: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +28: [2023-05-25 13:38:02,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +17: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_01-model_states.pt. +11: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +14: [2023-05-25 13:38:02,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +17: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +19: [2023-05-25 13:38:02,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +15: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +22: [2023-05-25 13:38:02,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +22: [2023-05-25 13:38:02,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +17: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +19: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +23: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +18: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +15: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:02,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +16: [2023-05-25 13:38:02,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +16: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +10: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +18: [2023-05-25 13:38:02,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +16: [2023-05-25 13:38:02,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +17: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +15: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +10: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +15: [2023-05-25 13:38:02,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +18: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +22: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +14: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +19: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +14: [2023-05-25 13:38:02,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +23: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +23: [2023-05-25 13:38:02,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +19: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +21: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +10: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +21: [2023-05-25 13:38:02,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +22: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +11: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +12: [2023-05-25 13:38:02,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 8: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +23: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +11: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +19: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +19: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +18: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +23: [2023-05-25 13:38:02,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +22: [2023-05-25 13:38:02,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +10: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +13: [2023-05-25 13:38:02,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +22: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +12: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +16: [2023-05-25 13:38:02,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_01-model_states.pt. +16: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +16: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +17: [2023-05-25 13:38:02,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +19: [2023-05-25 13:38:02,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +18: [2023-05-25 13:38:02,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +23: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +23: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +21: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +18: [2023-05-25 13:38:02,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +10: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +23: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt... +10: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +23: [2023-05-25 13:38:02,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +16: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +17: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +22: [2023-05-25 13:38:02,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +22: [2023-05-25 13:38:02,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +23: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +21: [2023-05-25 13:38:02,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:02,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. +20: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +16: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +13: [2023-05-25 13:38:02,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +17: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +18: [2023-05-25 13:38:02,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_29-model_02-model_states.pt. +20: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +16: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +22: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +12: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +12: [2023-05-25 13:38:02,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +20: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +10: [2023-05-25 13:38:02,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +20: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +11: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +11: [2023-05-25 13:38:02,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +10: [2023-05-25 13:38:02,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +17: [2023-05-25 13:38:02,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +17: [2023-05-25 13:38:02,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +18: [2023-05-25 13:38:02,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +20: [2023-05-25 13:38:02,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt... +14: [2023-05-25 13:38:02,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +17: [2023-05-25 13:38:02,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +10: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +21: [2023-05-25 13:38:02,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +21: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +17: [2023-05-25 13:38:02,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +21: [2023-05-25 13:38:02,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +10: [2023-05-25 13:38:02,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +10: [2023-05-25 13:38:02,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +15: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +18: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +15: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +13: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +16: [2023-05-25 13:38:02,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +11: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +12: [2023-05-25 13:38:02,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +12: [2023-05-25 13:38:02,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +16: [2023-05-25 13:38:02,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +16: [2023-05-25 13:38:02,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +13: [2023-05-25 13:38:02,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +16: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +13: [2023-05-25 13:38:02,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt... +18: [2023-05-25 13:38:02,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +15: [2023-05-25 13:38:02,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +15: [2023-05-25 13:38:02,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +18: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +13: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +20: [2023-05-25 13:38:02,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +12: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +20: [2023-05-25 13:38:02,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt... +20: [2023-05-25 13:38:02,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +14: [2023-05-25 13:38:02,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +14: [2023-05-25 13:38:02,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +18: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_00-model_states.pt. +20: [2023-05-25 13:38:02,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... +20: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +14: [2023-05-25 13:38:02,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +18: [2023-05-25 13:38:02,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_03-model_states.pt. +11: [2023-05-25 13:38:02,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +13: [2023-05-25 13:38:02,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +10: [2023-05-25 13:38:02,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +10: [2023-05-25 13:38:02,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +10: [2023-05-25 13:38:02,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +11: [2023-05-25 13:38:02,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +11: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +12: [2023-05-25 13:38:02,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +15: [2023-05-25 13:38:02,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +27: [2023-05-25 13:38:02,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +27: [2023-05-25 13:38:02,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +15: [2023-05-25 13:38:02,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 9: [2023-05-25 13:38:02,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +12: [2023-05-25 13:38:02,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 4: [2023-05-25 13:38:02,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +13: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +13: [2023-05-25 13:38:02,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +27: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +13: [2023-05-25 13:38:02,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... +24: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +24: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +13: [2023-05-25 13:38:02,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +13: [2023-05-25 13:38:02,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +26: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +26: [2023-05-25 13:38:02,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +24: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +24: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +29: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +29: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +27: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +26: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 6: [2023-05-25 13:38:02,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +24: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +13: [2023-05-25 13:38:02,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 7: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +13: [2023-05-25 13:38:02,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 7: [2023-05-25 13:38:02,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. +26: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +26: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +25: [2023-05-25 13:38:02,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +13: [2023-05-25 13:38:02,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +28: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +13: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +25: [2023-05-25 13:38:02,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +25: [2023-05-25 13:38:02,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +29: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +26: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +29: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +31: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +31: [2023-05-25 13:38:02,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 1: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 1: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 2: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +26: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +26: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +31: [2023-05-25 13:38:02,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. +29: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_02-model_states.pt. + 6: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 2: [2023-05-25 13:38:02,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +30: [2023-05-25 13:38:02,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +28: [2023-05-25 13:38:02,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +26: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +28: [2023-05-25 13:38:02,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. +28: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +31: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 0: [2023-05-25 13:38:02,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +31: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +25: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +30: [2023-05-25 13:38:02,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +31: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +30: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_03-model_states.pt. +25: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +24: [2023-05-25 13:38:02,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +21: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +31: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +21: [2023-05-25 13:38:02,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +30: [2023-05-25 13:38:02,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +23: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 5: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 4: [2023-05-25 13:38:02,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +28: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +29: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 1: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +24: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +29: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +28: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 4: [2023-05-25 13:38:02,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +18: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +18: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +21: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +30: [2023-05-25 13:38:02,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +29: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +15: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +21: [2023-05-25 13:38:02,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +15: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +14: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +14: [2023-05-25 13:38:02,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +23: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +29: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 3: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +12: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +12: [2023-05-25 13:38:02,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +23: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 5: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +29: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +29: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 4: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +10: [2023-05-25 13:38:02,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 7: [2023-05-25 13:38:02,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:02,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +29: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +10: [2023-05-25 13:38:02,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +22: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +19: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 7: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... + 7: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +18: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +14: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +18: [2023-05-25 13:38:02,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +24: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 6: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +28: [2023-05-25 13:38:02,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +15: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +28: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +24: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +12: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +12: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +25: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +15: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt... +25: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +11: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 7: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... + 2: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +31: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +25: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +20: [2023-05-25 13:38:02,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +20: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +31: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +24: [2023-05-25 13:38:02,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +11: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +31: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +10: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:02,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. +24: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +10: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +27: [2023-05-25 13:38:02,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +10: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +31: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +22: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +27: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. +10: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +27: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_41-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +28: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt... +28: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 0: [2023-05-25 13:38:02,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +26: [2023-05-25 13:38:02,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +28: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +19: [2023-05-25 13:38:02,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +26: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +26: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +19: [2023-05-25 13:38:02,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +30: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +26: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +20: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +30: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +26: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +26: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +17: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +17: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +20: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +28: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +30: [2023-05-25 13:38:02,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt... +14: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +28: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_07-model_03-model_states.pt. +28: [2023-05-25 13:38:02,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:02,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +28: [2023-05-25 13:38:02,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt... +11: [2023-05-25 13:38:02,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +11: [2023-05-25 13:38:02,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:02,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +21: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +12: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +14: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +15: [2023-05-25 13:38:02,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +29: [2023-05-25 13:38:02,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. + 9: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +12: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +14: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +25: [2023-05-25 13:38:02,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +14: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +27: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +24: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +24: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +15: [2023-05-25 13:38:02,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +17: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +31: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +27: [2023-05-25 13:38:02,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt... +14: [2023-05-25 13:38:02,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +12: [2023-05-25 13:38:02,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +28: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +15: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +29: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +15: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +29: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +14: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +25: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt... +17: [2023-05-25 13:38:02,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +31: [2023-05-25 13:38:02,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +10: [2023-05-25 13:38:02,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +12: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:02,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +21: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_01-model_states.pt. +15: [2023-05-25 13:38:02,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +15: [2023-05-25 13:38:02,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +31: [2023-05-25 13:38:02,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:02,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +11: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +10: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +26: [2023-05-25 13:38:02,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +25: [2023-05-25 13:38:02,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +28: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:02,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +28: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +28: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +27: [2023-05-25 13:38:02,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +11: [2023-05-25 13:38:02,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +13: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_01-model_states.pt. +23: [2023-05-25 13:38:02,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +28: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +28: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +26: [2023-05-25 13:38:02,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +19: [2023-05-25 13:38:02,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +19: [2023-05-25 13:38:02,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +30: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +20: [2023-05-25 13:38:02,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +20: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +26: [2023-05-25 13:38:02,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_19-model_02-model_states.pt. + 3: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_00-model_states.pt. +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +21: [2023-05-25 13:38:02,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +27: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt... +28: [2023-05-25 13:38:02,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:02,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +26: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:02,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +27: [2023-05-25 13:38:02,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +27: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_00-model_states.pt. +27: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +21: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +21: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +11: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +27: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt... +11: [2023-05-25 13:38:02,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +11: [2023-05-25 13:38:02,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. + 9: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +15: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt... +23: [2023-05-25 13:38:02,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +10: [2023-05-25 13:38:02,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +15: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... + 9: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +12: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +20: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:02,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +12: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +21: [2023-05-25 13:38:02,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +20: [2023-05-25 13:38:02,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +21: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +17: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +17: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +13: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +16: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +20: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:02,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +13: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt... +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +16: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... + 8: [2023-05-25 13:38:02,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +16: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:02,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +20: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +20: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. + 8: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_00-model_states.pt. +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +19: [2023-05-25 13:38:02,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +12: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +19: [2023-05-25 13:38:02,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +11: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt... +22: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +11: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +22: [2023-05-25 13:38:02,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +16: [2023-05-25 13:38:02,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +13: [2023-05-25 13:38:02,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +17: [2023-05-25 13:38:02,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +20: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +16: [2023-05-25 13:38:02,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_02-model_states.pt. +23: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +12: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:02,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +18: [2023-05-25 13:38:02,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt... +14: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +18: [2023-05-25 13:38:02,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +13: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +11: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +19: [2023-05-25 13:38:02,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:02,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +14: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:02,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +14: [2023-05-25 13:38:02,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +10: [2023-05-25 13:38:02,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +18: [2023-05-25 13:38:02,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +19: [2023-05-25 13:38:02,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +14: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +16: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +23: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:02,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 6: [2023-05-25 13:38:02,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +14: [2023-05-25 13:38:02,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:02,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:02,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +10: [2023-05-25 13:38:02,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +23: [2023-05-25 13:38:02,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +17: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +13: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +23: [2023-05-25 13:38:02,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +22: [2023-05-25 13:38:02,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +22: [2023-05-25 13:38:02,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +13: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +17: [2023-05-25 13:38:02,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +19: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:02,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... + 9: [2023-05-25 13:38:02,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:02,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:02,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +20: [2023-05-25 13:38:02,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:02,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +21: [2023-05-25 13:38:02,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +14: [2023-05-25 13:38:02,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +22: [2023-05-25 13:38:02,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +22: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +14: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +17: [2023-05-25 13:38:02,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:02,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +15: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +15: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 8: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +10: [2023-05-25 13:38:02,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 8: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +10: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +14: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 3: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 8: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +14: [2023-05-25 13:38:02,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 3: [2023-05-25 13:38:02,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:02,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +18: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt... +21: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +22: [2023-05-25 13:38:02,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +10: [2023-05-25 13:38:02,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +22: [2023-05-25 13:38:02,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:02,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +18: [2023-05-25 13:38:02,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +15: [2023-05-25 13:38:02,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:02,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +18: [2023-05-25 13:38:02,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 9: [2023-05-25 13:38:02,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +12: [2023-05-25 13:38:02,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +12: [2023-05-25 13:38:02,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 9: [2023-05-25 13:38:02,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 9: [2023-05-25 13:38:02,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 1: [2023-05-25 13:38:02,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 6: [2023-05-25 13:38:02,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:02,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +18: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +12: [2023-05-25 13:38:02,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 1: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +22: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +15: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:02,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +21: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +11: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. + 1: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_03-model_states.pt. +15: [2023-05-25 13:38:02,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 1: [2023-05-25 13:38:02,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:02,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +22: [2023-05-25 13:38:02,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 0: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +22: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 4: [2023-05-25 13:38:02,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +15: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +22: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 7: [2023-05-25 13:38:02,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +31: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +19: [2023-05-25 13:38:02,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +19: [2023-05-25 13:38:02,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:02,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +17: [2023-05-25 13:38:02,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +16: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 4: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:02,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +23: [2023-05-25 13:38:02,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. + 3: [2023-05-25 13:38:02,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:02,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +20: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_30-model_03-model_states.pt. +18: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:02,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:02,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +18: [2023-05-25 13:38:02,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +18: [2023-05-25 13:38:02,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +31: [2023-05-25 13:38:02,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:02,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +18: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +19: [2023-05-25 13:38:02,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +19: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. +16: [2023-05-25 13:38:02,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +17: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... +23: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 2: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +12: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +16: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 6: [2023-05-25 13:38:02,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 5: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +12: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 2: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +20: [2023-05-25 13:38:02,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 2: [2023-05-25 13:38:02,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +20: [2023-05-25 13:38:02,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt... + 3: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 3: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_01-model_states.pt. + 5: [2023-05-25 13:38:02,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +11: [2023-05-25 13:38:02,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +11: [2023-05-25 13:38:03,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:03,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... +11: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +19: [2023-05-25 13:38:03,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +26: [2023-05-25 13:38:03,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +23: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +16: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +26: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +16: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. +19: [2023-05-25 13:38:03,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +23: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +23: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +16: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +17: [2023-05-25 13:38:03,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +20: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +13: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +13: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt... +14: [2023-05-25 13:38:03,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +14: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:03,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +28: [2023-05-25 13:38:03,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +28: [2023-05-25 13:38:03,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +26: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +24: [2023-05-25 13:38:03,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +24: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +29: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +29: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +13: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +13: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +30: [2023-05-25 13:38:03,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +30: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... +11: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +28: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +11: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +10: [2023-05-25 13:38:03,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +29: [2023-05-25 13:38:03,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +24: [2023-05-25 13:38:03,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:03,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +26: [2023-05-25 13:38:03,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +25: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +24: [2023-05-25 13:38:03,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +25: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +10: [2023-05-25 13:38:03,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +10: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +11: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +30: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +31: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +31: [2023-05-25 13:38:03,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +11: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +12: [2023-05-25 13:38:03,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +28: [2023-05-25 13:38:03,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +28: [2023-05-25 13:38:03,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +26: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +15: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +24: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +24: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +13: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:03,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +26: [2023-05-25 13:38:03,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +13: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +13: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +26: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +25: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +31: [2023-05-25 13:38:03,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +12: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +12: [2023-05-25 13:38:03,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +31: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +10: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +29: [2023-05-25 13:38:03,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +29: [2023-05-25 13:38:03,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +15: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +15: [2023-05-25 13:38:03,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +24: [2023-05-25 13:38:03,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +17: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +24: [2023-05-25 13:38:03,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +27: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:03,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +22: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +14: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_20-model_02-model_states.pt. +22: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +11: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +17: [2023-05-25 13:38:03,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +16: [2023-05-25 13:38:03,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +27: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +10: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +27: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +22: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +22: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +30: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +30: [2023-05-25 13:38:03,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +29: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +28: [2023-05-25 13:38:03,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +10: [2023-05-25 13:38:03,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +11: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +26: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +23: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +26: [2023-05-25 13:38:03,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +26: [2023-05-25 13:38:03,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +31: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +12: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:03,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +15: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +31: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +16: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +31: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +11: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +11: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +14: [2023-05-25 13:38:03,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +14: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt... +21: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +15: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +27: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +22: [2023-05-25 13:38:03,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +17: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_02-model_states.pt. +30: [2023-05-25 13:38:03,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +29: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +16: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +22: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +29: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +20: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +29: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +31: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +20: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +23: [2023-05-25 13:38:03,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +11: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +12: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +31: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +28: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +26: [2023-05-25 13:38:03,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +26: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +20: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +23: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +20: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +31: [2023-05-25 13:38:03,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:03,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +11: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:03,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +10: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +21: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +10: [2023-05-25 13:38:03,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +15: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +28: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +28: [2023-05-25 13:38:03,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +25: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +15: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +25: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +29: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +15: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +15: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +24: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +19: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +25: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +24: [2023-05-25 13:38:03,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +17: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +18: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +17: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +24: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +20: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_08-model_03-model_states.pt. +28: [2023-05-25 13:38:03,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +11: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +20: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +22: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +19: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. +22: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +26: [2023-05-25 13:38:03,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +26: [2023-05-25 13:38:03,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +10: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +28: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +11: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +18: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +31: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +22: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +28: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +10: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +21: [2023-05-25 13:38:03,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +14: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +18: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_02-model_states.pt. +25: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +21: [2023-05-25 13:38:03,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +14: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_00-model_states.pt. +30: [2023-05-25 13:38:03,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +28: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +30: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +14: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +25: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +14: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt... +29: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +28: [2023-05-25 13:38:03,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +29: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +31: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +29: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +22: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +13: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +17: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +30: [2023-05-25 13:38:03,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +30: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_03-model_states.pt. +27: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. +23: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +18: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +22: [2023-05-25 13:38:03,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +30: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +16: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt... +12: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +26: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +26: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +23: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +25: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +31: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +23: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +25: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +13: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_01-model_states.pt. +16: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +22: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +19: [2023-05-25 13:38:03,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +21: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +18: [2023-05-25 13:38:03,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +21: [2023-05-25 13:38:03,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt... +16: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +16: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +21: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +25: [2023-05-25 13:38:03,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +25: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +12: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +30: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +25: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +24: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +24: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +18: [2023-05-25 13:38:03,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +27: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +12: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +12: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +23: [2023-05-25 13:38:03,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:03,157] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +20: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +13: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +22: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +22: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +20: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +28: [2023-05-25 13:38:03,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_42-model_02-model_states.pt. +20: [2023-05-25 13:38:03,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:03,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +28: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +19: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +15: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +19: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +15: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +20: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +20: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +23: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +17: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +26: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +12: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +14: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +19: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +28: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +17: [2023-05-25 13:38:03,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +26: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +19: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +23: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +11: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +11: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +25: [2023-05-25 13:38:03,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_00-model_states.pt. +17: [2023-05-25 13:38:03,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +24: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:03,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt... +17: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +22: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +26: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +19: [2023-05-25 13:38:03,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +22: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +27: [2023-05-25 13:38:03,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +29: [2023-05-25 13:38:03,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +29: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +19: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +27: [2023-05-25 13:38:03,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +21: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +18: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +18: [2023-05-25 13:38:03,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +21: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt... +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +19: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +30: [2023-05-25 13:38:03,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +16: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +19: [2023-05-25 13:38:03,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt... +27: [2023-05-25 13:38:03,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +21: [2023-05-25 13:38:03,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +13: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +13: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:03,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +16: [2023-05-25 13:38:03,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt... +27: [2023-05-25 13:38:03,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +12: [2023-05-25 13:38:03,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +30: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +18: [2023-05-25 13:38:03,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +30: [2023-05-25 13:38:03,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +30: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt... +27: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt... +15: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +11: [2023-05-25 13:38:03,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +16: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +10: [2023-05-25 13:38:03,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +14: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +29: [2023-05-25 13:38:03,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +29: [2023-05-25 13:38:03,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:03,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +17: [2023-05-25 13:38:03,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +27: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt... +18: [2023-05-25 13:38:03,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +19: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:03,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +15: [2023-05-25 13:38:03,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +11: [2023-05-25 13:38:03,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +14: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_00-model_states.pt. +13: [2023-05-25 13:38:03,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +19: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +11: [2023-05-25 13:38:03,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +15: [2023-05-25 13:38:03,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt... +10: [2023-05-25 13:38:03,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +13: [2023-05-25 13:38:03,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +14: [2023-05-25 13:38:03,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +10: [2023-05-25 13:38:03,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +15: [2023-05-25 13:38:03,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +23: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +13: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +10: [2023-05-25 13:38:03,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:03,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +13: [2023-05-25 13:38:03,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +13: [2023-05-25 13:38:03,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +23: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,277] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +14: [2023-05-25 13:38:03,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +14: [2023-05-25 13:38:03,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +12: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +12: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +18: [2023-05-25 13:38:03,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +18: [2023-05-25 13:38:03,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +17: [2023-05-25 13:38:03,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +17: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +16: [2023-05-25 13:38:03,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:03,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +12: [2023-05-25 13:38:03,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +23: [2023-05-25 13:38:03,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +23: [2023-05-25 13:38:03,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +18: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +17: [2023-05-25 13:38:03,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +17: [2023-05-25 13:38:03,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +23: [2023-05-25 13:38:03,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +29: [2023-05-25 13:38:03,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +29: [2023-05-25 13:38:03,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +20: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +16: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +20: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +23: [2023-05-25 13:38:03,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +16: [2023-05-25 13:38:03,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +21: [2023-05-25 13:38:03,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +21: [2023-05-25 13:38:03,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +21: [2023-05-25 13:38:03,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +21: [2023-05-25 13:38:03,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +19: [2023-05-25 13:38:03,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +29: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +22: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +20: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +22: [2023-05-25 13:38:03,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_31-model_03-model_states.pt. +11: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. +20: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +11: [2023-05-25 13:38:03,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +19: [2023-05-25 13:38:03,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +18: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +17: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +12: [2023-05-25 13:38:03,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +19: [2023-05-25 13:38:03,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +22: [2023-05-25 13:38:03,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +12: [2023-05-25 13:38:03,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +22: [2023-05-25 13:38:03,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt... +30: [2023-05-25 13:38:03,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +30: [2023-05-25 13:38:03,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +17: [2023-05-25 13:38:03,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +11: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +16: [2023-05-25 13:38:03,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +17: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:03,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +18: [2023-05-25 13:38:03,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +20: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +16: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +16: [2023-05-25 13:38:03,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +31: [2023-05-25 13:38:03,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +24: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_01-model_states.pt. +28: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +16: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +30: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +20: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +30: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +22: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +31: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +26: [2023-05-25 13:38:03,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +24: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +19: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. +26: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:03,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +24: [2023-05-25 13:38:03,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +16: [2023-05-25 13:38:03,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +19: [2023-05-25 13:38:03,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... +28: [2023-05-25 13:38:03,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +19: [2023-05-25 13:38:03,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +22: [2023-05-25 13:38:03,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:03,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +14: [2023-05-25 13:38:03,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 5: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 5: [2023-05-25 13:38:03,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +26: [2023-05-25 13:38:03,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... +10: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +10: [2023-05-25 13:38:03,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +31: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +16: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +11: [2023-05-25 13:38:03,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +23: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +23: [2023-05-25 13:38:03,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:03,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:03,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +13: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +12: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +13: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +14: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +14: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +10: [2023-05-25 13:38:03,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +10: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +24: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +24: [2023-05-25 13:38:03,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:03,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +23: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +18: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +18: [2023-05-25 13:38:03,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +11: [2023-05-25 13:38:03,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +11: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +12: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +12: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt... +26: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +26: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 9: [2023-05-25 13:38:03,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +13: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +13: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +24: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +24: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +17: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:03,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +29: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +29: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +15: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +28: [2023-05-25 13:38:03,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +25: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +28: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +27: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +17: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +18: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +10: [2023-05-25 13:38:03,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +27: [2023-05-25 13:38:03,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +26: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +28: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +14: [2023-05-25 13:38:03,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +30: [2023-05-25 13:38:03,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +30: [2023-05-25 13:38:03,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +28: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +14: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +31: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_03-model_states.pt. +14: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +10: [2023-05-25 13:38:03,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +31: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +19: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +31: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +28: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +31: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +31: [2023-05-25 13:38:03,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +27: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +24: [2023-05-25 13:38:03,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +12: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +12: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +24: [2023-05-25 13:38:03,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +27: [2023-05-25 13:38:03,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +11: [2023-05-25 13:38:03,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +29: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +29: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +11: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +13: [2023-05-25 13:38:03,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +30: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +25: [2023-05-25 13:38:03,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_03-model_states.pt. +15: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +13: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +25: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +25: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +14: [2023-05-25 13:38:03,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +14: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +31: [2023-05-25 13:38:03,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +31: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +29: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +21: [2023-05-25 13:38:03,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +21: [2023-05-25 13:38:03,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +20: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +20: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +18: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +12: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +18: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +12: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +19: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +26: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +29: [2023-05-25 13:38:03,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +23: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +23: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +24: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. +24: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_09-model_02-model_states.pt. + 3: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +15: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +25: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +22: [2023-05-25 13:38:03,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +28: [2023-05-25 13:38:03,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +10: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +10: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +15: [2023-05-25 13:38:03,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +25: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +11: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +15: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +14: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +19: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +21: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +19: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +16: [2023-05-25 13:38:03,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +25: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +11: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +16: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +21: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +25: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +29: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +12: [2023-05-25 13:38:03,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +31: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +26: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +23: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... +26: [2023-05-25 13:38:03,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +22: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 0: [2023-05-25 13:38:03,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +26: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +28: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +10: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +17: [2023-05-25 13:38:03,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +28: [2023-05-25 13:38:03,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +12: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +12: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +23: [2023-05-25 13:38:03,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +16: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +21: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +20: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. +25: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +27: [2023-05-25 13:38:03,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:03,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +27: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +31: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_21-model_02-model_states.pt. +31: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt... +28: [2023-05-25 13:38:03,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +27: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +17: [2023-05-25 13:38:03,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +28: [2023-05-25 13:38:03,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +28: [2023-05-25 13:38:03,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_01-model_states.pt. +19: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +29: [2023-05-25 13:38:03,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +21: [2023-05-25 13:38:03,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +19: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +31: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +19: [2023-05-25 13:38:03,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +31: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +29: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +17: [2023-05-25 13:38:03,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +29: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt... +29: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt... +20: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,477] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +29: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +13: [2023-05-25 13:38:03,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +28: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt... +28: [2023-05-25 13:38:03,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt... + 2: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. +13: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +31: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt... +13: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +25: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +16: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt... + 8: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +25: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +21: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +30: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +16: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +25: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +22: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_01-model_states.pt. +28: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:03,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +25: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +17: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt... +24: [2023-05-25 13:38:03,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt... +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +31: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +12: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +24: [2023-05-25 13:38:03,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +31: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +22: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +27: [2023-05-25 13:38:03,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +23: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +23: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +24: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +12: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +17: [2023-05-25 13:38:03,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:03,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +23: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +22: [2023-05-25 13:38:03,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +30: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +16: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +16: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +25: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +12: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +12: [2023-05-25 13:38:03,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +21: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +20: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +25: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +22: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +19: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +12: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +25: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +26: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +20: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +16: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +21: [2023-05-25 13:38:03,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +17: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +25: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +18: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +18: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +12: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +27: [2023-05-25 13:38:03,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +11: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +13: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_01-model_states.pt. +17: [2023-05-25 13:38:03,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +26: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +27: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +26: [2023-05-25 13:38:03,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +23: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +21: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +27: [2023-05-25 13:38:03,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +28: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +27: [2023-05-25 13:38:03,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:03,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +20: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt... +11: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +27: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +11: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +15: [2023-05-25 13:38:03,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +15: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +15: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_00-model_states.pt. +27: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +13: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +24: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +24: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +13: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:03,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +26: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt... +30: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +26: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt... +20: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +28: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +26: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt... +27: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt... +28: [2023-05-25 13:38:03,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt... +20: [2023-05-25 13:38:03,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +30: [2023-05-25 13:38:03,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +22: [2023-05-25 13:38:03,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +18: [2023-05-25 13:38:03,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +20: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +13: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +24: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +22: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +30: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +30: [2023-05-25 13:38:03,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +20: [2023-05-25 13:38:03,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +30: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +24: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +22: [2023-05-25 13:38:03,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt... +30: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +27: [2023-05-25 13:38:03,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +10: [2023-05-25 13:38:03,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +24: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +10: [2023-05-25 13:38:03,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +11: [2023-05-25 13:38:03,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +30: [2023-05-25 13:38:03,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +13: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +15: [2023-05-25 13:38:03,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +27: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +24: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt... +24: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt... +13: [2023-05-25 13:38:03,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt... +30: [2023-05-25 13:38:03,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt... +30: [2023-05-25 13:38:03,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt... +15: [2023-05-25 13:38:03,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +14: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +11: [2023-05-25 13:38:03,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +14: [2023-05-25 13:38:03,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +14: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +12: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +15: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +15: [2023-05-25 13:38:03,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +12: [2023-05-25 13:38:03,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:03,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. +29: [2023-05-25 13:38:03,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_43-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +11: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +11: [2023-05-25 13:38:03,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +13: [2023-05-25 13:38:03,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +29: [2023-05-25 13:38:03,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... +29: [2023-05-25 13:38:03,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +14: [2023-05-25 13:38:03,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +22: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +17: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +22: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +17: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +11: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +23: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:03,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +20: [2023-05-25 13:38:03,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +20: [2023-05-25 13:38:03,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +14: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +10: [2023-05-25 13:38:03,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +10: [2023-05-25 13:38:03,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_03-model_states.pt. +16: [2023-05-25 13:38:03,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +16: [2023-05-25 13:38:03,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +18: [2023-05-25 13:38:03,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +12: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +21: [2023-05-25 13:38:03,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +21: [2023-05-25 13:38:03,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_32-model_03-model_states.pt. +23: [2023-05-25 13:38:03,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +23: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +20: [2023-05-25 13:38:03,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +19: [2023-05-25 13:38:03,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +17: [2023-05-25 13:38:03,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +10: [2023-05-25 13:38:03,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +29: [2023-05-25 13:38:03,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +10: [2023-05-25 13:38:03,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +16: [2023-05-25 13:38:03,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +22: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +16: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +29: [2023-05-25 13:38:03,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_00-model_states.pt. +18: [2023-05-25 13:38:03,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +18: [2023-05-25 13:38:03,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... +21: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +21: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +11: [2023-05-25 13:38:03,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +13: [2023-05-25 13:38:03,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +10: [2023-05-25 13:38:03,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +13: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 2: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +13: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +13: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +29: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... +29: [2023-05-25 13:38:03,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +11: [2023-05-25 13:38:03,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +11: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +23: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +17: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +23: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +23: [2023-05-25 13:38:03,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +20: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:03,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +17: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +16: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +10: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +19: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +20: [2023-05-25 13:38:03,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +19: [2023-05-25 13:38:03,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +22: [2023-05-25 13:38:03,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +20: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +19: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +10: [2023-05-25 13:38:03,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt... +16: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +17: [2023-05-25 13:38:03,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +16: [2023-05-25 13:38:03,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +16: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. +21: [2023-05-25 13:38:03,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +22: [2023-05-25 13:38:03,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +21: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +22: [2023-05-25 13:38:03,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +21: [2023-05-25 13:38:03,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +18: [2023-05-25 13:38:03,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_00-model_states.pt. +31: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +31: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +21: [2023-05-25 13:38:03,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... +18: [2023-05-25 13:38:03,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 6: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 4: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 5: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_03-model_states.pt. + 7: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +19: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +19: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +28: [2023-05-25 13:38:03,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +19: [2023-05-25 13:38:03,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_02-model_states.pt. +28: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +31: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +31: [2023-05-25 13:38:03,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 3: [2023-05-25 13:38:03,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 3: [2023-05-25 13:38:03,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +27: [2023-05-25 13:38:03,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +26: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +31: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +26: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +31: [2023-05-25 13:38:03,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +31: [2023-05-25 13:38:03,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +25: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +25: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +29: [2023-05-25 13:38:03,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +29: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +28: [2023-05-25 13:38:03,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +26: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +28: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 5: [2023-05-25 13:38:03,725] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 0: [2023-05-25 13:38:03,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +24: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +24: [2023-05-25 13:38:03,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +27: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. + 4: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +31: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +27: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +19: [2023-05-25 13:38:03,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +21: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt... + 2: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +31: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt... + 4: [2023-05-25 13:38:03,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt... +28: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt... + 5: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +25: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 2: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +26: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +24: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +26: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 5: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +30: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +29: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +29: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +25: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +29: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 0: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +25: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... + 4: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 7: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +29: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +31: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt... +31: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt... + 6: [2023-05-25 13:38:03,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 7: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +17: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +26: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +28: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +28: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +26: [2023-05-25 13:38:03,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +25: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +17: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +27: [2023-05-25 13:38:03,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +17: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +28: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +28: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +19: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +29: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +26: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 7: [2023-05-25 13:38:03,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +29: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +21: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +21: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +26: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +28: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +14: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +28: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +14: [2023-05-25 13:38:03,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +30: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +26: [2023-05-25 13:38:03,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +19: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +24: [2023-05-25 13:38:03,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +30: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +24: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +25: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +26: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +28: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +28: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +24: [2023-05-25 13:38:03,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 4: [2023-05-25 13:38:03,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 3: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +24: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 2: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 2: [2023-05-25 13:38:03,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +27: [2023-05-25 13:38:03,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +27: [2023-05-25 13:38:03,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt... +29: [2023-05-25 13:38:03,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt... + 0: [2023-05-25 13:38:03,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +27: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +25: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +27: [2023-05-25 13:38:03,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +30: [2023-05-25 13:38:03,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +30: [2023-05-25 13:38:03,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_03-model_states.pt. +25: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. + 0: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +14: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +12: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +12: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +31: [2023-05-25 13:38:03,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +31: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 0: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +24: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt... +24: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt... +12: [2023-05-25 13:38:03,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +25: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt... +25: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt... +12: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +26: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt... + 0: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +26: [2023-05-25 13:38:03,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt... +27: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +27: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_10-model_01-model_states.pt. +29: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +29: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +16: [2023-05-25 13:38:03,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:03,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +29: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +25: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt... +25: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt... +16: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +26: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt... + 1: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +26: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt... +29: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +30: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +24: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt... +24: [2023-05-25 13:38:03,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt... +22: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +30: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +28: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt... +28: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt... +23: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +19: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +14: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt... +14: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +23: [2023-05-25 13:38:03,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +16: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... + 1: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +17: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +14: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +31: [2023-05-25 13:38:03,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +13: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +22: [2023-05-25 13:38:03,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +31: [2023-05-25 13:38:03,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +31: [2023-05-25 13:38:03,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +20: [2023-05-25 13:38:03,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +30: [2023-05-25 13:38:03,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... +30: [2023-05-25 13:38:03,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt... + 6: [2023-05-25 13:38:03,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt... +27: [2023-05-25 13:38:03,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +16: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +27: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +16: [2023-05-25 13:38:03,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +23: [2023-05-25 13:38:03,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +27: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +14: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +27: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +22: [2023-05-25 13:38:03,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +22: [2023-05-25 13:38:03,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +19: [2023-05-25 13:38:03,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:03,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +20: [2023-05-25 13:38:03,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_03-model_states.pt. +20: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +20: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +27: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt... +12: [2023-05-25 13:38:03,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +26: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt... +26: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt... +20: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +12: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +27: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt... +12: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +20: [2023-05-25 13:38:03,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +20: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +18: [2023-05-25 13:38:03,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +29: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt... +29: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt... +30: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt... +30: [2023-05-25 13:38:03,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt... +14: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +15: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +31: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +31: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +12: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +27: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt... +27: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt... + 9: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +14: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +19: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +24: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt... +22: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +24: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt... +22: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +25: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. +19: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +25: [2023-05-25 13:38:03,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_01-model_states.pt. + 1: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +16: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +16: [2023-05-25 13:38:03,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +12: [2023-05-25 13:38:03,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +30: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt... +30: [2023-05-25 13:38:03,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt... + 9: [2023-05-25 13:38:03,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +31: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt... +31: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt... +18: [2023-05-25 13:38:03,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +11: [2023-05-25 13:38:03,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. +16: [2023-05-25 13:38:03,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +30: [2023-05-25 13:38:03,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +30: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +12: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt... +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +13: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +22: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. +10: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +11: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:03,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +22: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +25: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +25: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +12: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:03,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +11: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_02-model_states.pt. +16: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt... +16: [2023-05-25 13:38:03,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +20: [2023-05-25 13:38:03,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +16: [2023-05-25 13:38:03,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +23: [2023-05-25 13:38:03,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:03,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +30: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +23: [2023-05-25 13:38:03,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +30: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +10: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +20: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +13: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +11: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +17: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt... +22: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt... +30: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +30: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +23: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:03,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +23: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +17: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +20: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +18: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +21: [2023-05-25 13:38:03,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:03,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +12: [2023-05-25 13:38:03,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +18: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +13: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt... +18: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +13: [2023-05-25 13:38:03,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt... +20: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +20: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +12: [2023-05-25 13:38:03,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +15: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +20: [2023-05-25 13:38:03,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +12: [2023-05-25 13:38:03,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +12: [2023-05-25 13:38:03,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +18: [2023-05-25 13:38:03,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:03,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +18: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +16: [2023-05-25 13:38:03,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +15: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +16: [2023-05-25 13:38:03,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +11: [2023-05-25 13:38:03,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +28: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt... +28: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt... +10: [2023-05-25 13:38:03,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +16: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:03,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +10: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +16: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +18: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +25: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +25: [2023-05-25 13:38:03,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt... +10: [2023-05-25 13:38:03,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +17: [2023-05-25 13:38:03,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +25: [2023-05-25 13:38:03,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +11: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +25: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_01-model_states.pt. +13: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +13: [2023-05-25 13:38:03,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +10: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +21: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +12: [2023-05-25 13:38:03,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +13: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +13: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +15: [2023-05-25 13:38:03,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +17: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt... +21: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +14: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt... +29: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. +29: [2023-05-25 13:38:03,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_44-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. + 8: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +22: [2023-05-25 13:38:03,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +22: [2023-05-25 13:38:03,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +21: [2023-05-25 13:38:03,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +15: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +20: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +18: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +19: [2023-05-25 13:38:03,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +14: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt... +19: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +15: [2023-05-25 13:38:03,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. +21: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +18: [2023-05-25 13:38:03,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +15: [2023-05-25 13:38:03,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +21: [2023-05-25 13:38:03,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +10: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt... +21: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +21: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +10: [2023-05-25 13:38:03,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt... +12: [2023-05-25 13:38:03,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +21: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +14: [2023-05-25 13:38:03,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +29: [2023-05-25 13:38:03,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt... +29: [2023-05-25 13:38:03,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +22: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +10: [2023-05-25 13:38:03,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +22: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +19: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +20: [2023-05-25 13:38:03,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +29: [2023-05-25 13:38:03,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +19: [2023-05-25 13:38:03,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +13: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +29: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt... +13: [2023-05-25 13:38:03,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +10: [2023-05-25 13:38:03,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +11: [2023-05-25 13:38:03,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_03-model_states.pt. +15: [2023-05-25 13:38:03,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +15: [2023-05-25 13:38:03,899] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +29: [2023-05-25 13:38:03,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. + 9: [2023-05-25 13:38:03,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt... + 9: [2023-05-25 13:38:03,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt... +10: [2023-05-25 13:38:03,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,904] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +10: [2023-05-25 13:38:03,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +29: [2023-05-25 13:38:03,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_46-model_02-model_states.pt. +13: [2023-05-25 13:38:03,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:03,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +30: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt... +10: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,908] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:03,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +14: [2023-05-25 13:38:03,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:03,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +20: [2023-05-25 13:38:03,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +14: [2023-05-25 13:38:03,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 8: [2023-05-25 13:38:03,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:03,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt... +11: [2023-05-25 13:38:03,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt... +14: [2023-05-25 13:38:03,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:03,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +23: [2023-05-25 13:38:03,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +23: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +22: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +22: [2023-05-25 13:38:03,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +20: [2023-05-25 13:38:03,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +30: [2023-05-25 13:38:03,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt... + 9: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +20: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +19: [2023-05-25 13:38:03,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +14: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +14: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 8: [2023-05-25 13:38:03,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +19: [2023-05-25 13:38:03,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +15: [2023-05-25 13:38:03,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +19: [2023-05-25 13:38:03,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 8: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_22-model_02-model_states.pt. + 2: [2023-05-25 13:38:03,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +17: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_01-model_states.pt. +19: [2023-05-25 13:38:03,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +12: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +12: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +11: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:03,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +11: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:03,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 3: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +25: [2023-05-25 13:38:03,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt... +25: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt... +11: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:03,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +23: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +14: [2023-05-25 13:38:03,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +15: [2023-05-25 13:38:03,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +15: [2023-05-25 13:38:03,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 9: [2023-05-25 13:38:03,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +10: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +13: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... +10: [2023-05-25 13:38:03,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 6: [2023-05-25 13:38:03,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt... +10: [2023-05-25 13:38:03,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:03,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +11: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +23: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +10: [2023-05-25 13:38:03,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:03,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +10: [2023-05-25 13:38:03,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt... + 9: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +10: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 4: [2023-05-25 13:38:03,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +15: [2023-05-25 13:38:03,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +15: [2023-05-25 13:38:03,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +29: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt... +29: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt... + 4: [2023-05-25 13:38:03,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +15: [2023-05-25 13:38:03,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +15: [2023-05-25 13:38:03,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 7: [2023-05-25 13:38:03,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 5: [2023-05-25 13:38:03,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. +17: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:03,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... +11: [2023-05-25 13:38:03,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +17: [2023-05-25 13:38:03,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... +17: [2023-05-25 13:38:03,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt... + 1: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 1: [2023-05-25 13:38:03,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 4: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_00-model_states.pt. + 4: [2023-05-25 13:38:03,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +11: [2023-05-25 13:38:03,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt... + 6: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 8: [2023-05-25 13:38:03,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:03,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 9: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:03,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 7: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 7: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 3: [2023-05-25 13:38:03,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:03,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 2: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:03,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:03,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:03,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +13: [2023-05-25 13:38:03,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. +13: [2023-05-25 13:38:03,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_01-model_states.pt. + 6: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +14: [2023-05-25 13:38:03,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 6: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 8: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +14: [2023-05-25 13:38:03,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 8: [2023-05-25 13:38:03,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +17: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +17: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 9: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 9: [2023-05-25 13:38:03,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 7: [2023-05-25 13:38:03,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 0: [2023-05-25 13:38:03,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. +11: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +11: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +13: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. +17: [2023-05-25 13:38:04,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +17: [2023-05-25 13:38:04,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:04,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +19: [2023-05-25 13:38:04,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +22: [2023-05-25 13:38:04,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +22: [2023-05-25 13:38:04,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +20: [2023-05-25 13:38:04,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +20: [2023-05-25 13:38:04,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +21: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +18: [2023-05-25 13:38:04,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +21: [2023-05-25 13:38:04,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +18: [2023-05-25 13:38:04,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:04,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:04,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +19: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +22: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +17: [2023-05-25 13:38:04,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +13: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +13: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +20: [2023-05-25 13:38:04,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +20: [2023-05-25 13:38:04,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:04,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +18: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +21: [2023-05-25 13:38:04,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +17: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +17: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +13: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... +13: [2023-05-25 13:38:04,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +17: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 7: [2023-05-25 13:38:04,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +23: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +23: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 0: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... +19: [2023-05-25 13:38:04,059] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:04,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +19: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. +22: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +23: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +22: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 6: [2023-05-25 13:38:04,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_11-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +23: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +22: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +14: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +19: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +19: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 3: [2023-05-25 13:38:04,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +20: [2023-05-25 13:38:04,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +14: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +18: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +20: [2023-05-25 13:38:04,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +20: [2023-05-25 13:38:04,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +18: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +20: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +18: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +21: [2023-05-25 13:38:04,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +18: [2023-05-25 13:38:04,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +21: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +10: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +10: [2023-05-25 13:38:04,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +29: [2023-05-25 13:38:04,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:04,076] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 232 + 3: [2023-05-25 13:38:04,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 9: [2023-05-25 13:38:04,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt... +14: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +14: [2023-05-25 13:38:04,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +12: [2023-05-25 13:38:04,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +12: [2023-05-25 13:38:04,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +10: [2023-05-25 13:38:04,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt... +23: [2023-05-25 13:38:04,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 4: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 1: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +16: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +12: [2023-05-25 13:38:04,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_33-model_03-model_states.pt. +12: [2023-05-25 13:38:04,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +10: [2023-05-25 13:38:04,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:04,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... +14: [2023-05-25 13:38:04,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +11: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. +16: [2023-05-25 13:38:04,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... + 9: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +14: [2023-05-25 13:38:04,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +14: [2023-05-25 13:38:04,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +14: [2023-05-25 13:38:04,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt... +10: [2023-05-25 13:38:04,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +10: [2023-05-25 13:38:04,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +11: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +11: [2023-05-25 13:38:04,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +12: [2023-05-25 13:38:04,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +12: [2023-05-25 13:38:04,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +12: [2023-05-25 13:38:04,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +22: [2023-05-25 13:38:04,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +12: [2023-05-25 13:38:04,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +13: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +16: [2023-05-25 13:38:04,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +22: [2023-05-25 13:38:04,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +16: [2023-05-25 13:38:04,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +11: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +16: [2023-05-25 13:38:04,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_00-model_states.pt. +11: [2023-05-25 13:38:04,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +16: [2023-05-25 13:38:04,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt... +22: [2023-05-25 13:38:04,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:04,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +11: [2023-05-25 13:38:04,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +11: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +21: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +21: [2023-05-25 13:38:04,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +19: [2023-05-25 13:38:04,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +13: [2023-05-25 13:38:04,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +13: [2023-05-25 13:38:04,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +21: [2023-05-25 13:38:04,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +13: [2023-05-25 13:38:04,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +16: [2023-05-25 13:38:04,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +18: [2023-05-25 13:38:04,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +18: [2023-05-25 13:38:04,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +16: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +15: [2023-05-25 13:38:04,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +23: [2023-05-25 13:38:04,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +21: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +23: [2023-05-25 13:38:04,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +18: [2023-05-25 13:38:04,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +19: [2023-05-25 13:38:04,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +18: [2023-05-25 13:38:04,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +20: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +15: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +23: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +23: [2023-05-25 13:38:04,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_02-model_states.pt. +17: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +17: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +15: [2023-05-25 13:38:04,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +19: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +19: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +17: [2023-05-25 13:38:04,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt... +17: [2023-05-25 13:38:04,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt... +20: [2023-05-25 13:38:04,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +20: [2023-05-25 13:38:04,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +16: [2023-05-25 13:38:04,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +19: [2023-05-25 13:38:04,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_01-model_states.pt. +11: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +11: [2023-05-25 13:38:04,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +17: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +16: [2023-05-25 13:38:04,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. +18: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +15: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +20: [2023-05-25 13:38:04,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +18: [2023-05-25 13:38:04,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +22: [2023-05-25 13:38:04,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +18: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +21: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +20: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +20: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +23: [2023-05-25 13:38:04,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt... +23: [2023-05-25 13:38:04,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt... + 8: [2023-05-25 13:38:04,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_23-model_02-model_states.pt. +18: [2023-05-25 13:38:04,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +21: [2023-05-25 13:38:04,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +18: [2023-05-25 13:38:04,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +17: [2023-05-25 13:38:04,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +17: [2023-05-25 13:38:04,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +17: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +15: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +17: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +25: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt. +22: [2023-05-25 13:38:04,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +25: [2023-05-25 13:38:04,272] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 204 +22: [2023-05-25 13:38:04,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +23: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt... +21: [2023-05-25 13:38:04,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +15: [2023-05-25 13:38:04,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +16: [2023-05-25 13:38:04,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,277] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +20: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +22: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +22: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +17: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +23: [2023-05-25 13:38:04,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +16: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +23: [2023-05-25 13:38:04,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt... +19: [2023-05-25 13:38:04,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +19: [2023-05-25 13:38:04,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +19: [2023-05-25 13:38:04,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt... +12: [2023-05-25 13:38:04,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +12: [2023-05-25 13:38:04,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +12: [2023-05-25 13:38:04,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +12: [2023-05-25 13:38:04,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +22: [2023-05-25 13:38:04,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +15: [2023-05-25 13:38:04,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt... +15: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt... +11: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt... +11: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt... +20: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt... +21: [2023-05-25 13:38:04,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt... + 9: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 9: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 8: [2023-05-25 13:38:04,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +20: [2023-05-25 13:38:04,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt... +16: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt... +18: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt... +18: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt... +21: [2023-05-25 13:38:04,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt... + 8: [2023-05-25 13:38:04,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... + 8: [2023-05-25 13:38:04,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt... +13: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +13: [2023-05-25 13:38:04,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 8: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +10: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. +10: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 9: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt... + 8: [2023-05-25 13:38:04,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt... +22: [2023-05-25 13:38:04,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt... +22: [2023-05-25 13:38:04,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt... + 9: [2023-05-25 13:38:04,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +27: [2023-05-25 13:38:04,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt. +11: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +11: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +13: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +13: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +10: [2023-05-25 13:38:04,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +10: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +14: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +14: [2023-05-25 13:38:04,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +13: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt... +15: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. +15: [2023-05-25 13:38:04,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +13: [2023-05-25 13:38:04,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt... +14: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt... +14: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt... + 6: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +12: [2023-05-25 13:38:04,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt... +12: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt... + 6: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 9: [2023-05-25 13:38:04,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt... + 9: [2023-05-25 13:38:04,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt... +31: [2023-05-25 13:38:04,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:04,346] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 248 +27: [2023-05-25 13:38:04,333] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 220 + 4: [2023-05-25 13:38:04,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 8: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt... + 8: [2023-05-25 13:38:04,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt... + 2: [2023-05-25 13:38:04,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 9: [2023-05-25 13:38:04,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt... + 9: [2023-05-25 13:38:04,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt... + 1: [2023-05-25 13:38:04,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt... +10: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt... + 7: [2023-05-25 13:38:04,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +14: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt... +14: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt... +15: [2023-05-25 13:38:04,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt... +15: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt... + 6: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:04,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 3: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +27: [2023-05-25 13:38:04,374] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 220 + 6: [2023-05-25 13:38:04,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +21: [2023-05-25 13:38:04,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +21: [2023-05-25 13:38:04,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +25: [2023-05-25 13:38:04,386] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 204 + 7: [2023-05-25 13:38:04,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 4: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 9: [2023-05-25 13:38:04,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 9: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +13: [2023-05-25 13:38:04,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt... +13: [2023-05-25 13:38:04,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt... + 1: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... + 1: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +21: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... + 2: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +31: [2023-05-25 13:38:04,394] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 248 +21: [2023-05-25 13:38:04,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... + 3: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +11: [2023-05-25 13:38:04,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt... +11: [2023-05-25 13:38:04,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt... + 5: [2023-05-25 13:38:04,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... + 5: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +27: [2023-05-25 13:38:04,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt. + 9: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt... + 9: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt... + 2: [2023-05-25 13:38:04,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +29: [2023-05-25 13:38:04,411] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 232 + 0: > overriding learning rate value to 0.0002 + 0: > overriding minimum learning rate value to 2e-05 + 0: > overriding warmup iterations value to 0 + 0: > overriding total number of iterations value to 1 + 0: > overriding decay style value to cosine +25: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt. +25: [2023-05-25 13:38:04,414] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 203 + 7: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +11: [2023-05-25 13:38:04,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +11: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... + 7: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... + 0: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... + 4: [2023-05-25 13:38:04,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +27: [2023-05-25 13:38:04,408] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 216 + 7: [2023-05-25 13:38:04,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... +22: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +14: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +14: [2023-05-25 13:38:04,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +21: [2023-05-25 13:38:04,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +27: [2023-05-25 13:38:04,420] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 216 +12: [2023-05-25 13:38:04,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +12: [2023-05-25 13:38:04,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +21: [2023-05-25 13:38:04,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +21: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +21: [2023-05-25 13:38:04,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +22: [2023-05-25 13:38:04,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 1: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. +17: [2023-05-25 13:38:04,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +17: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. +10: [2023-05-25 13:38:04,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +13: [2023-05-25 13:38:04,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +13: [2023-05-25 13:38:04,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +31: [2023-05-25 13:38:04,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:04,459] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 253 + 5: [2023-05-25 13:38:04,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 5: [2023-05-25 13:38:04,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +22: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +17: [2023-05-25 13:38:04,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +28: [2023-05-25 13:38:04,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt. +28: [2023-05-25 13:38:04,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 228 +17: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +30: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:04,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 244 + 4: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +18: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +22: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +22: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +18: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +22: [2023-05-25 13:38:04,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +30: [2023-05-25 13:38:04,475] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 244 +28: [2023-05-25 13:38:04,476] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 228 + 0: [2023-05-25 13:38:04,476] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +23: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +23: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +18: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. +16: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +16: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +18: [2023-05-25 13:38:04,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +20: [2023-05-25 13:38:04,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +23: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +17: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +23: [2023-05-25 13:38:04,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +10: [2023-05-25 13:38:04,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +17: [2023-05-25 13:38:04,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +16: [2023-05-25 13:38:04,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +16: [2023-05-25 13:38:04,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 7: [2023-05-25 13:38:04,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +15: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. +15: [2023-05-25 13:38:04,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +17: [2023-05-25 13:38:04,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +20: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 5: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +14: [2023-05-25 13:38:04,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt... + 1: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 1: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 6: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 8: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 8: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_24-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +17: [2023-05-25 13:38:04,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 5: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 3: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +14: [2023-05-25 13:38:04,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt... + 0: [2023-05-25 13:38:04,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 2: [2023-05-25 13:38:04,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 1: [2023-05-25 13:38:04,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +11: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt... +11: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt... +18: [2023-05-25 13:38:04,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 5: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +18: [2023-05-25 13:38:04,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +18: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 6: [2023-05-25 13:38:04,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +18: [2023-05-25 13:38:04,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 5: [2023-05-25 13:38:04,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +23: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... +16: [2023-05-25 13:38:04,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +23: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +23: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +28: [2023-05-25 13:38:04,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:04,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 226 + 0: [2023-05-25 13:38:04,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... +16: [2023-05-25 13:38:04,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +16: [2023-05-25 13:38:04,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 6: [2023-05-25 13:38:04,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. +20: [2023-05-25 13:38:04,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. + 4: [2023-05-25 13:38:04,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +20: [2023-05-25 13:38:04,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +16: [2023-05-25 13:38:04,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +19: [2023-05-25 13:38:04,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. +20: [2023-05-25 13:38:04,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +20: [2023-05-25 13:38:04,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +19: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_34-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 1: [2023-05-25 13:38:04,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 2: [2023-05-25 13:38:04,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 2: [2023-05-25 13:38:04,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 2: [2023-05-25 13:38:04,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 1: [2023-05-25 13:38:04,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_12-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 6: [2023-05-25 13:38:04,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 4: [2023-05-25 13:38:04,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 4: [2023-05-25 13:38:04,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt... + 3: [2023-05-25 13:38:04,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +19: [2023-05-25 13:38:04,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... +19: [2023-05-25 13:38:04,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt... + 3: [2023-05-25 13:38:04,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +31: [2023-05-25 13:38:04,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:04,550] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 251 + 1: [2023-05-25 13:38:04,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 1: [2023-05-25 13:38:04,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 3: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +10: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt... +10: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt... +12: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt... + 7: [2023-05-25 13:38:04,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 7: [2023-05-25 13:38:04,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt... + 0: [2023-05-25 13:38:04,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt... + 0: [2023-05-25 13:38:04,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 0: [2023-05-25 13:38:04,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 0: [2023-05-25 13:38:04,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +12: [2023-05-25 13:38:04,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt. +12: [2023-05-25 13:38:04,563] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 100 +23: [2023-05-25 13:38:04,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +23: [2023-05-25 13:38:04,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +15: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt... +15: [2023-05-25 13:38:04,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt... +13: [2023-05-25 13:38:04,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt... +13: [2023-05-25 13:38:04,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt... + 6: [2023-05-25 13:38:04,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +19: [2023-05-25 13:38:04,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +29: [2023-05-25 13:38:04,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt. +29: [2023-05-25 13:38:04,574] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 236 +19: [2023-05-25 13:38:04,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_00-model_states.pt. +12: [2023-05-25 13:38:04,576] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 100 +19: [2023-05-25 13:38:04,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... +19: [2023-05-25 13:38:04,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt... + 6: [2023-05-25 13:38:04,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. +16: [2023-05-25 13:38:04,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:04,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... + 7: [2023-05-25 13:38:04,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_00-model_states.pt. + 7: [2023-05-25 13:38:04,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +18: [2023-05-25 13:38:04,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt... +29: [2023-05-25 13:38:04,585] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 236 +29: [2023-05-25 13:38:04,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:04,590] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 237 +18: [2023-05-25 13:38:04,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +27: [2023-05-25 13:38:04,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:04,592] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 221 +28: [2023-05-25 13:38:04,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt. +28: [2023-05-25 13:38:04,603] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 227 +16: [2023-05-25 13:38:04,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +16: [2023-05-25 13:38:04,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt... + 8: [2023-05-25 13:38:04,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt... +17: [2023-05-25 13:38:04,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +24: [2023-05-25 13:38:04,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:04,620] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 194 +17: [2023-05-25 13:38:04,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +24: [2023-05-25 13:38:04,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt. +24: [2023-05-25 13:38:04,622] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 196 +18: [2023-05-25 13:38:04,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +18: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +16: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt... +16: [2023-05-25 13:38:04,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt... +31: [2023-05-25 13:38:04,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt. +31: [2023-05-25 13:38:04,625] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 252 +16: [2023-05-25 13:38:04,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt... +16: [2023-05-25 13:38:04,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +19: [2023-05-25 13:38:04,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:04,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +17: [2023-05-25 13:38:04,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +21: [2023-05-25 13:38:04,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +21: [2023-05-25 13:38:04,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +20: [2023-05-25 13:38:04,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +13: [2023-05-25 13:38:04,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,633] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 108 +20: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +21: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt... +21: [2023-05-25 13:38:04,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt... +20: [2023-05-25 13:38:04,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +20: [2023-05-25 13:38:04,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +31: [2023-05-25 13:38:04,635] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 252 +26: [2023-05-25 13:38:04,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt. +26: [2023-05-25 13:38:04,636] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 208 +24: [2023-05-25 13:38:04,636] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 196 +25: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt. +22: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +25: [2023-05-25 13:38:04,639] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 200 +22: [2023-05-25 13:38:04,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +22: [2023-05-25 13:38:04,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +22: [2023-05-25 13:38:04,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +31: [2023-05-25 13:38:04,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt. +31: [2023-05-25 13:38:04,644] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 254 +30: [2023-05-25 13:38:04,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt. +30: [2023-05-25 13:38:04,646] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 240 +23: [2023-05-25 13:38:04,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt... +23: [2023-05-25 13:38:04,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt... +13: [2023-05-25 13:38:04,647] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 108 +18: [2023-05-25 13:38:04,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt... +18: [2023-05-25 13:38:04,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt... +26: [2023-05-25 13:38:04,648] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 208 +17: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt... +17: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +19: [2023-05-25 13:38:04,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_01-model_states.pt. +25: [2023-05-25 13:38:04,652] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 200 +19: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt... +19: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt... +17: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt... +17: [2023-05-25 13:38:04,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt... +20: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt... +20: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt... +24: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt. +14: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt. +24: [2023-05-25 13:38:04,654] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 192 +18: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt... +18: [2023-05-25 13:38:04,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt... +14: [2023-05-25 13:38:04,654] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 112 +20: [2023-05-25 13:38:04,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt... +20: [2023-05-25 13:38:04,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt... +30: [2023-05-25 13:38:04,658] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 240 +22: [2023-05-25 13:38:04,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt... +22: [2023-05-25 13:38:04,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt... +14: [2023-05-25 13:38:04,666] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 112 +24: [2023-05-25 13:38:04,667] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 192 +31: [2023-05-25 13:38:04,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt. +31: [2023-05-25 13:38:04,668] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 250 +26: [2023-05-25 13:38:04,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:04,674] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 214 +26: [2023-05-25 13:38:04,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:04,677] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 211 +23: [2023-05-25 13:38:04,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +23: [2023-05-25 13:38:04,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_02-model_states.pt. +28: [2023-05-25 13:38:04,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt. +28: [2023-05-25 13:38:04,691] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 231 + 2: [2023-05-25 13:38:04,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +24: [2023-05-25 13:38:04,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:04,697] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 197 + 2: [2023-05-25 13:38:04,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +11: [2023-05-25 13:38:04,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt. +11: [2023-05-25 13:38:04,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 92 +19: [2023-05-25 13:38:04,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt... +19: [2023-05-25 13:38:04,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt... +21: [2023-05-25 13:38:04,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt... +21: [2023-05-25 13:38:04,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt... + 7: [2023-05-25 13:38:04,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 7: [2023-05-25 13:38:04,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +11: [2023-05-25 13:38:04,716] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 92 +22: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt... +22: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt... +26: [2023-05-25 13:38:04,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt. +26: [2023-05-25 13:38:04,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 212 + 3: [2023-05-25 13:38:04,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 3: [2023-05-25 13:38:04,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +26: [2023-05-25 13:38:04,732] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 212 +27: [2023-05-25 13:38:04,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt. +27: [2023-05-25 13:38:04,735] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 217 + 7: [2023-05-25 13:38:04,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt... + 7: [2023-05-25 13:38:04,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt... +25: [2023-05-25 13:38:04,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:04,742] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 201 +31: [2023-05-25 13:38:04,746] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 253 + 6: [2023-05-25 13:38:04,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 6: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 2: [2023-05-25 13:38:04,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 4: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 2: [2023-05-25 13:38:04,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt... + 2: [2023-05-25 13:38:04,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +30: [2023-05-25 13:38:04,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt. +30: [2023-05-25 13:38:04,762] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 246 +25: [2023-05-25 13:38:04,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:04,763] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 202 +28: [2023-05-25 13:38:04,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt. +28: [2023-05-25 13:38:04,764] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 224 +27: [2023-05-25 13:38:04,764] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 217 +25: [2023-05-25 13:38:04,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt. +25: [2023-05-25 13:38:04,767] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 206 +27: [2023-05-25 13:38:04,764] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 221 +21: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +27: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:04,770] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 219 +21: [2023-05-25 13:38:04,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 5: [2023-05-25 13:38:04,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +28: [2023-05-25 13:38:04,776] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 224 + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 0: [2023-05-25 13:38:04,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. + 8: [2023-05-25 13:38:04,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 64 + 3: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +30: [2023-05-25 13:38:04,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:04,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 247 +12: [2023-05-25 13:38:04,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt. +12: [2023-05-25 13:38:04,782] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 96 +29: [2023-05-25 13:38:04,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:04,782] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 235 +23: [2023-05-25 13:38:04,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt... +23: [2023-05-25 13:38:04,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt... +27: [2023-05-25 13:38:04,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt. +27: [2023-05-25 13:38:04,786] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 223 +26: [2023-05-25 13:38:04,789] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 211 +30: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:04,791] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 241 + 2: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt... + 2: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt... + 4: [2023-05-25 13:38:04,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +12: [2023-05-25 13:38:04,794] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 96 + 8: [2023-05-25 13:38:04,795] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 64 + 3: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt... + 3: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_01-model_states.pt. +24: [2023-05-25 13:38:04,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:04,796] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 199 +29: [2023-05-25 13:38:04,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt. +29: [2023-05-25 13:38:04,797] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 233 + 6: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt... + 6: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt... +22: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +22: [2023-05-25 13:38:04,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 3: [2023-05-25 13:38:04,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +30: [2023-05-25 13:38:04,803] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 241 +29: [2023-05-25 13:38:04,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt. +29: [2023-05-25 13:38:04,804] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 239 +29: [2023-05-25 13:38:04,804] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 237 +25: [2023-05-25 13:38:04,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt. +25: [2023-05-25 13:38:04,805] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 207 + 7: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 7: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt... + 3: [2023-05-25 13:38:04,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt... +21: [2023-05-25 13:38:04,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt... +29: [2023-05-25 13:38:04,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:04,807] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 234 +29: [2023-05-25 13:38:04,808] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 233 + 0: [2023-05-25 13:38:04,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt... + 0: [2023-05-25 13:38:04,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt... +27: [2023-05-25 13:38:04,810] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 223 +28: [2023-05-25 13:38:04,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt. +28: [2023-05-25 13:38:04,811] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 230 +24: [2023-05-25 13:38:04,812] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 199 +27: [2023-05-25 13:38:04,810] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 219 +24: [2023-05-25 13:38:04,813] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 197 +21: [2023-05-25 13:38:04,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt... + 0: [2023-05-25 13:38:04,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt... + 0: [2023-05-25 13:38:04,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt... + 4: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 0: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 0: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 4: [2023-05-25 13:38:04,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 6: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +26: [2023-05-25 13:38:04,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt. +26: [2023-05-25 13:38:04,822] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 210 +23: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +23: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 1: [2023-05-25 13:38:04,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +13: [2023-05-25 13:38:04,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt. +13: [2023-05-25 13:38:04,829] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 104 +18: [2023-05-25 13:38:04,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +18: [2023-05-25 13:38:04,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +25: [2023-05-25 13:38:04,831] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 201 +28: [2023-05-25 13:38:04,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:04,837] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 229 +31: [2023-05-25 13:38:04,840] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 250 +24: [2023-05-25 13:38:04,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt. +24: [2023-05-25 13:38:04,841] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 195 +13: [2023-05-25 13:38:04,843] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 104 +31: [2023-05-25 13:38:04,844] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 254 + 5: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +10: [2023-05-25 13:38:04,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,836] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 80 + 5: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 3: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt... + 3: [2023-05-25 13:38:04,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt... + 1: [2023-05-25 13:38:04,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt... + 1: [2023-05-25 13:38:04,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 1: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +16: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +16: [2023-05-25 13:38:04,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +24: [2023-05-25 13:38:04,848] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 194 +28: [2023-05-25 13:38:04,849] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 231 +28: [2023-05-25 13:38:04,851] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 229 +30: [2023-05-25 13:38:04,851] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 246 +19: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt... + 5: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt... + 6: [2023-05-25 13:38:04,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt... + 6: [2023-05-25 13:38:04,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt... +28: [2023-05-25 13:38:04,853] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 227 +15: [2023-05-25 13:38:04,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:04,853] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 124 +10: [2023-05-25 13:38:04,853] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 80 +17: [2023-05-25 13:38:04,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +17: [2023-05-25 13:38:04,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +24: [2023-05-25 13:38:04,855] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 195 + 0: [2023-05-25 13:38:04,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt... + 0: [2023-05-25 13:38:04,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt... + 4: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt... +29: [2023-05-25 13:38:04,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 239 + 4: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt... +19: [2023-05-25 13:38:04,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. + 5: [2023-05-25 13:38:04,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt... + 5: [2023-05-25 13:38:04,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt... +29: [2023-05-25 13:38:04,858] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 235 +28: [2023-05-25 13:38:04,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt. +28: [2023-05-25 13:38:04,860] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 225 +20: [2023-05-25 13:38:04,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +25: [2023-05-25 13:38:04,861] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 203 +25: [2023-05-25 13:38:04,863] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 207 +31: [2023-05-25 13:38:04,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt. +31: [2023-05-25 13:38:04,864] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 249 +15: [2023-05-25 13:38:04,865] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 124 + 7: [2023-05-25 13:38:04,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 7: [2023-05-25 13:38:04,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +20: [2023-05-25 13:38:04,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_35-model_03-model_states.pt. +30: [2023-05-25 13:38:04,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt. +30: [2023-05-25 13:38:04,869] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 245 +28: [2023-05-25 13:38:04,871] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 225 + 2: [2023-05-25 13:38:04,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. + 2: [2023-05-25 13:38:04,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +29: [2023-05-25 13:38:04,875] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 234 + 4: [2023-05-25 13:38:04,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +31: [2023-05-25 13:38:04,876] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 249 +24: [2023-05-25 13:38:04,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt. +24: [2023-05-25 13:38:04,878] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 193 +29: [2023-05-25 13:38:04,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt. +29: [2023-05-25 13:38:04,881] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 238 +25: [2023-05-25 13:38:04,881] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 202 +30: [2023-05-25 13:38:04,881] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 245 + 1: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt... + 1: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt... +24: [2023-05-25 13:38:04,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt. +24: [2023-05-25 13:38:04,884] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 198 +25: [2023-05-25 13:38:04,884] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 206 + 5: [2023-05-25 13:38:04,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt... + 5: [2023-05-25 13:38:04,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt... +24: [2023-05-25 13:38:04,890] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 193 + 4: [2023-05-25 13:38:04,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_02-model_states.pt. +29: [2023-05-25 13:38:04,895] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 238 + 6: [2023-05-25 13:38:04,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. + 6: [2023-05-25 13:38:04,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/layer_13-model_03-model_states.pt. +26: [2023-05-25 13:38:04,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:04,896] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 209 +24: [2023-05-25 13:38:04,896] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 198 +26: [2023-05-25 13:38:04,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt. +26: [2023-05-25 13:38:04,898] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 215 +31: [2023-05-25 13:38:04,899] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 251 +27: [2023-05-25 13:38:04,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:04,900] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 222 +17: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt... +17: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt... +22: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt... +22: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt... +26: [2023-05-25 13:38:04,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt. +26: [2023-05-25 13:38:04,904] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 213 +26: [2023-05-25 13:38:04,904] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 214 +26: [2023-05-25 13:38:04,905] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 210 +28: [2023-05-25 13:38:04,905] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 226 + 7: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt... + 7: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt... +31: [2023-05-25 13:38:04,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt. +31: [2023-05-25 13:38:04,908] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 255 +30: [2023-05-25 13:38:04,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt. +30: [2023-05-25 13:38:04,910] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 243 + 7: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt... + 7: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt... + 2: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt... + 2: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt... +19: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt... +19: [2023-05-25 13:38:04,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt... +27: [2023-05-25 13:38:04,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 222 +26: [2023-05-25 13:38:04,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 209 +26: [2023-05-25 13:38:04,915] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 215 + 1: [2023-05-25 13:38:04,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt... + 1: [2023-05-25 13:38:04,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt... +26: [2023-05-25 13:38:04,918] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 213 +28: [2023-05-25 13:38:04,922] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 230 +31: [2023-05-25 13:38:04,922] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 255 +18: [2023-05-25 13:38:04,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt... +18: [2023-05-25 13:38:04,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt... +30: [2023-05-25 13:38:04,924] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 247 +30: [2023-05-25 13:38:04,927] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 243 +20: [2023-05-25 13:38:04,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt... +20: [2023-05-25 13:38:04,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt... +25: [2023-05-25 13:38:04,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt. +25: [2023-05-25 13:38:04,935] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 205 + 9: [2023-05-25 13:38:04,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:04,938] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 76 +17: [2023-05-25 13:38:04,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:04,943] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 136 +23: [2023-05-25 13:38:04,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt... +23: [2023-05-25 13:38:04,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt... +30: [2023-05-25 13:38:04,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt. +30: [2023-05-25 13:38:04,944] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 242 + 8: [2023-05-25 13:38:04,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:04,948] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 68 +25: [2023-05-25 13:38:04,948] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 205 +17: [2023-05-25 13:38:04,954] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 136 +30: [2023-05-25 13:38:04,957] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 242 +27: [2023-05-25 13:38:04,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt. +27: [2023-05-25 13:38:04,958] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 218 + 8: [2023-05-25 13:38:04,962] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 68 + 9: [2023-05-25 13:38:04,952] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 76 +27: [2023-05-25 13:38:04,972] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 218 + 4: [2023-05-25 13:38:04,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt... + 4: [2023-05-25 13:38:04,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt... +16: [2023-05-25 13:38:04,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt... +16: [2023-05-25 13:38:04,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt... +10: [2023-05-25 13:38:04,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt. +10: [2023-05-25 13:38:04,975] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 84 +10: [2023-05-25 13:38:04,988] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 84 + 6: [2023-05-25 13:38:04,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt... + 6: [2023-05-25 13:38:04,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt... +15: [2023-05-25 13:38:05,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt. +15: [2023-05-25 13:38:05,019] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 120 +15: [2023-05-25 13:38:05,031] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 120 +17: [2023-05-25 13:38:05,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt. +17: [2023-05-25 13:38:05,074] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 140 + 9: [2023-05-25 13:38:05,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt. + 9: [2023-05-25 13:38:05,067] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 72 + 9: [2023-05-25 13:38:05,080] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 72 +17: [2023-05-25 13:38:05,086] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 140 +11: [2023-05-25 13:38:05,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt. +11: [2023-05-25 13:38:05,119] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 88 +19: [2023-05-25 13:38:05,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:05,130] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 152 +11: [2023-05-25 13:38:05,131] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 88 +19: [2023-05-25 13:38:05,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 152 +22: [2023-05-25 13:38:05,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:05,146] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 176 +10: [2023-05-25 13:38:05,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,155] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 85 +22: [2023-05-25 13:38:05,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 176 + 8: [2023-05-25 13:38:05,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:05,175] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 69 + 8: [2023-05-25 13:38:05,187] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 69 + 9: [2023-05-25 13:38:05,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,167] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 85 + 9: [2023-05-25 13:38:05,206] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 77 +10: [2023-05-25 13:38:05,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt. +10: [2023-05-25 13:38:05,171] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 81 +10: [2023-05-25 13:38:05,183] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 81 +13: [2023-05-25 13:38:05,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:05,235] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 111 +13: [2023-05-25 13:38:05,248] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 111 +12: [2023-05-25 13:38:05,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:05,256] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 99 +15: [2023-05-25 13:38:05,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:05,260] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 123 +12: [2023-05-25 13:38:05,268] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 99 +23: [2023-05-25 13:38:05,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:05,271] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 188 +18: [2023-05-25 13:38:05,273] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:05,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt. +18: [2023-05-25 13:38:05,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 148 +15: [2023-05-25 13:38:05,274] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 123 +18: [2023-05-25 13:38:05,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 144 + 9: [2023-05-25 13:38:05,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,216] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 79 + 9: [2023-05-25 13:38:05,220] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 77 + 9: [2023-05-25 13:38:05,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 79 +14: [2023-05-25 13:38:05,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt. + 8: [2023-05-25 13:38:05,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:05,276] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 67 +14: [2023-05-25 13:38:05,276] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 116 +23: [2023-05-25 13:38:05,283] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 188 +18: [2023-05-25 13:38:05,285] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 148 +18: [2023-05-25 13:38:05,286] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 144 +14: [2023-05-25 13:38:05,288] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 116 + 8: [2023-05-25 13:38:05,291] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 67 +11: [2023-05-25 13:38:05,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:05,293] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 91 +14: [2023-05-25 13:38:05,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt. +14: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 117 +21: [2023-05-25 13:38:05,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt. +21: [2023-05-25 13:38:05,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 172 +15: [2023-05-25 13:38:05,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt. +15: [2023-05-25 13:38:05,303] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 127 +22: [2023-05-25 13:38:05,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt. +22: [2023-05-25 13:38:05,304] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 180 +11: [2023-05-25 13:38:05,307] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 91 +13: [2023-05-25 13:38:05,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt. +13: [2023-05-25 13:38:05,308] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 107 +12: [2023-05-25 13:38:05,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt. +12: [2023-05-25 13:38:05,311] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 103 +21: [2023-05-25 13:38:05,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 172 +14: [2023-05-25 13:38:05,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 117 +11: [2023-05-25 13:38:05,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:05,313] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 93 +15: [2023-05-25 13:38:05,315] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 127 +22: [2023-05-25 13:38:05,318] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 180 +19: [2023-05-25 13:38:05,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt. +19: [2023-05-25 13:38:05,319] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 156 +12: [2023-05-25 13:38:05,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:05,321] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 97 +13: [2023-05-25 13:38:05,321] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 107 +14: [2023-05-25 13:38:05,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,325] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 103 +11: [2023-05-25 13:38:05,327] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 93 +14: [2023-05-25 13:38:05,320] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 118 +10: [2023-05-25 13:38:05,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:05,330] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 83 +19: [2023-05-25 13:38:05,332] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 156 +14: [2023-05-25 13:38:05,332] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 118 +12: [2023-05-25 13:38:05,333] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 97 + 9: [2023-05-25 13:38:05,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt. + 9: [2023-05-25 13:38:05,335] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 73 + 9: [2023-05-25 13:38:05,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:05,335] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 78 +11: [2023-05-25 13:38:05,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt. +11: [2023-05-25 13:38:05,341] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 89 +10: [2023-05-25 13:38:05,342] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 83 +14: [2023-05-25 13:38:05,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:05,344] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 114 +11: [2023-05-25 13:38:05,354] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 89 +13: [2023-05-25 13:38:05,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:05,356] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 106 +12: [2023-05-25 13:38:05,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt. +12: [2023-05-25 13:38:05,366] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 101 +13: [2023-05-25 13:38:05,370] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 106 + 8: [2023-05-25 13:38:05,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt. + 8: [2023-05-25 13:38:05,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 65 +15: [2023-05-25 13:38:05,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:05,378] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 125 +12: [2023-05-25 13:38:05,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 101 +15: [2023-05-25 13:38:05,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt. +15: [2023-05-25 13:38:05,382] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 121 + 8: [2023-05-25 13:38:05,390] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 65 +15: [2023-05-25 13:38:05,391] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 125 +15: [2023-05-25 13:38:05,396] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 121 +13: [2023-05-25 13:38:05,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:05,399] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 105 + 9: [2023-05-25 13:38:05,352] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 73 +14: [2023-05-25 13:38:05,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,352] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 78 +10: [2023-05-25 13:38:05,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt. +14: [2023-05-25 13:38:05,352] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 115 +14: [2023-05-25 13:38:05,357] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 114 +14: [2023-05-25 13:38:05,366] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 115 +10: [2023-05-25 13:38:05,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 86 +14: [2023-05-25 13:38:05,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt. +14: [2023-05-25 13:38:05,366] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 119 +14: [2023-05-25 13:38:05,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 119 +13: [2023-05-25 13:38:05,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 105 +11: [2023-05-25 13:38:05,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt. +11: [2023-05-25 13:38:05,413] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 95 + 8: [2023-05-25 13:38:05,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt. + 8: [2023-05-25 13:38:05,419] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 71 + 4: [2023-05-25 13:38:05,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:05,420] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 36 +11: [2023-05-25 13:38:05,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt. +11: [2023-05-25 13:38:05,427] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 90 +11: [2023-05-25 13:38:05,429] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 95 +16: [2023-05-25 13:38:05,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:05,429] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 132 + 4: [2023-05-25 13:38:05,434] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 36 + 8: [2023-05-25 13:38:05,434] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 71 +10: [2023-05-25 13:38:05,409] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 86 +11: [2023-05-25 13:38:05,442] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 90 +16: [2023-05-25 13:38:05,444] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 132 +12: [2023-05-25 13:38:05,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,447] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 98 + 9: [2023-05-25 13:38:05,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt. + 9: [2023-05-25 13:38:05,451] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 74 + 3: [2023-05-25 13:38:05,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:05,458] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 24 +10: [2023-05-25 13:38:05,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt. +10: [2023-05-25 13:38:05,459] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 87 +12: [2023-05-25 13:38:05,459] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 98 + 9: [2023-05-25 13:38:05,466] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 74 +18: [2023-05-25 13:38:05,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:05,472] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 149 + 3: [2023-05-25 13:38:05,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 24 + 1: [2023-05-25 13:38:05,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:05,475] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 12 +10: [2023-05-25 13:38:05,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 87 +16: [2023-05-25 13:38:05,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:05,483] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 129 +18: [2023-05-25 13:38:05,485] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 149 + 9: [2023-05-25 13:38:05,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt. + 9: [2023-05-25 13:38:05,487] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 75 + 1: [2023-05-25 13:38:05,491] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 12 +16: [2023-05-25 13:38:05,496] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 129 +20: [2023-05-25 13:38:05,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:05,499] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 161 + 8: [2023-05-25 13:38:05,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:05,502] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 66 + 9: [2023-05-25 13:38:05,500] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 75 +21: [2023-05-25 13:38:05,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:05,501] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 173 +12: [2023-05-25 13:38:05,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt. +12: [2023-05-25 13:38:05,504] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 102 +16: [2023-05-25 13:38:05,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt. +16: [2023-05-25 13:38:05,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 133 +20: [2023-05-25 13:38:05,513] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 161 +12: [2023-05-25 13:38:05,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 102 + 8: [2023-05-25 13:38:05,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 66 +13: [2023-05-25 13:38:05,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt. +13: [2023-05-25 13:38:05,518] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 109 +16: [2023-05-25 13:38:05,520] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 133 +20: [2023-05-25 13:38:05,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:05,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 160 +16: [2023-05-25 13:38:05,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt. +16: [2023-05-25 13:38:05,526] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 128 +13: [2023-05-25 13:38:05,532] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 109 +20: [2023-05-25 13:38:05,534] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 160 +13: [2023-05-25 13:38:05,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt. +13: [2023-05-25 13:38:05,536] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 110 +14: [2023-05-25 13:38:05,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt. +21: [2023-05-25 13:38:05,515] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 173 +21: [2023-05-25 13:38:05,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt. +14: [2023-05-25 13:38:05,528] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 113 +21: [2023-05-25 13:38:05,518] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 168 +21: [2023-05-25 13:38:05,530] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 168 + 0: [2023-05-25 13:38:05,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:05,538] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 4 +16: [2023-05-25 13:38:05,539] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 128 +15: [2023-05-25 13:38:05,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:05,542] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 126 +14: [2023-05-25 13:38:05,542] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 113 +13: [2023-05-25 13:38:05,551] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 110 + 0: [2023-05-25 13:38:05,552] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 4 +19: [2023-05-25 13:38:05,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:05,553] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 154 +15: [2023-05-25 13:38:05,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt. +15: [2023-05-25 13:38:05,555] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 122 +15: [2023-05-25 13:38:05,555] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 126 +19: [2023-05-25 13:38:05,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:05,559] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 153 +19: [2023-05-25 13:38:05,566] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 154 + 2: [2023-05-25 13:38:05,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. + 2: [2023-05-25 13:38:05,568] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 16 +15: [2023-05-25 13:38:05,568] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 122 +20: [2023-05-25 13:38:05,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt. +20: [2023-05-25 13:38:05,571] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 164 +19: [2023-05-25 13:38:05,572] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 153 +20: [2023-05-25 13:38:05,582] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 164 + 2: [2023-05-25 13:38:05,583] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 16 + 8: [2023-05-25 13:38:05,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt. + 8: [2023-05-25 13:38:05,583] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 70 +20: [2023-05-25 13:38:05,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,589] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 162 + 2: [2023-05-25 13:38:05,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +11: [2023-05-25 13:38:05,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt. + 2: [2023-05-25 13:38:05,595] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 20 +11: [2023-05-25 13:38:05,595] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 94 + 8: [2023-05-25 13:38:05,600] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 70 +20: [2023-05-25 13:38:05,602] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 162 +19: [2023-05-25 13:38:05,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt. +19: [2023-05-25 13:38:05,602] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 157 +10: [2023-05-25 13:38:05,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt. +10: [2023-05-25 13:38:05,607] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 82 + 2: [2023-05-25 13:38:05,608] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 20 +11: [2023-05-25 13:38:05,611] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 94 + 5: [2023-05-25 13:38:05,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. + 5: [2023-05-25 13:38:05,614] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 40 +19: [2023-05-25 13:38:05,615] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 157 + 6: [2023-05-25 13:38:05,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:05,619] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 49 +18: [2023-05-25 13:38:05,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt. +18: [2023-05-25 13:38:05,621] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 145 +10: [2023-05-25 13:38:05,623] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 82 + 1: [2023-05-25 13:38:05,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. + 1: [2023-05-25 13:38:05,629] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 8 + 5: [2023-05-25 13:38:05,629] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 40 +22: [2023-05-25 13:38:05,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:05,631] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 183 +17: [2023-05-25 13:38:05,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,632] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 143 + 5: [2023-05-25 13:38:05,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:05,633] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 41 + 6: [2023-05-25 13:38:05,634] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 49 +18: [2023-05-25 13:38:05,634] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 145 + 4: [2023-05-25 13:38:05,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. + 4: [2023-05-25 13:38:05,639] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 32 +23: [2023-05-25 13:38:05,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:05,641] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 186 +21: [2023-05-25 13:38:05,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:05,631] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 175 +21: [2023-05-25 13:38:05,643] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 175 +22: [2023-05-25 13:38:05,644] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 183 +17: [2023-05-25 13:38:05,644] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 143 + 1: [2023-05-25 13:38:05,646] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 8 + 6: [2023-05-25 13:38:05,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:05,648] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 52 + 5: [2023-05-25 13:38:05,650] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 41 +23: [2023-05-25 13:38:05,653] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 186 + 4: [2023-05-25 13:38:05,654] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 32 +20: [2023-05-25 13:38:05,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,656] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 166 + 6: [2023-05-25 13:38:05,662] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 52 +23: [2023-05-25 13:38:05,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:05,667] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 191 +17: [2023-05-25 13:38:05,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,670] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 166 +17: [2023-05-25 13:38:05,670] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 142 +17: [2023-05-25 13:38:05,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,671] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 139 +16: [2023-05-25 13:38:05,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:05,676] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 131 +23: [2023-05-25 13:38:05,678] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 191 + 4: [2023-05-25 13:38:05,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,681] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 33 +17: [2023-05-25 13:38:05,685] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 139 +23: [2023-05-25 13:38:05,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt. +23: [2023-05-25 13:38:05,686] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 184 +17: [2023-05-25 13:38:05,686] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 142 +16: [2023-05-25 13:38:05,689] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 131 +22: [2023-05-25 13:38:05,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:05,690] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 181 + 4: [2023-05-25 13:38:05,697] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 33 +23: [2023-05-25 13:38:05,698] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 184 +23: [2023-05-25 13:38:05,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt. +23: [2023-05-25 13:38:05,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 190 +22: [2023-05-25 13:38:05,703] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 181 +22: [2023-05-25 13:38:05,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:05,703] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 182 +23: [2023-05-25 13:38:05,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt. +23: [2023-05-25 13:38:05,703] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 187 +16: [2023-05-25 13:38:05,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:05,704] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 130 +21: [2023-05-25 13:38:05,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt. +21: [2023-05-25 13:38:05,681] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 169 +21: [2023-05-25 13:38:05,695] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 169 +23: [2023-05-25 13:38:05,713] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 190 + 4: [2023-05-25 13:38:05,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:05,714] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 35 +23: [2023-05-25 13:38:05,716] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 187 +22: [2023-05-25 13:38:05,717] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 182 +16: [2023-05-25 13:38:05,718] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 130 + 5: [2023-05-25 13:38:05,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,718] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 42 + 0: [2023-05-25 13:38:05,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. + 0: [2023-05-25 13:38:05,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 0 +16: [2023-05-25 13:38:05,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt. +16: [2023-05-25 13:38:05,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 134 +18: [2023-05-25 13:38:05,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:05,720] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 147 + 3: [2023-05-25 13:38:05,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,721] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 29 +17: [2023-05-25 13:38:05,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:05,724] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 141 + 5: [2023-05-25 13:38:05,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt. + 5: [2023-05-25 13:38:05,724] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 46 +17: [2023-05-25 13:38:05,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt. +20: [2023-05-25 13:38:05,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt. +17: [2023-05-25 13:38:05,729] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 138 +20: [2023-05-25 13:38:05,729] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 167 + 4: [2023-05-25 13:38:05,730] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 35 +18: [2023-05-25 13:38:05,731] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 147 +16: [2023-05-25 13:38:05,733] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 134 + 0: [2023-05-25 13:38:05,733] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 0 + 7: [2023-05-25 13:38:05,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:05,734] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 57 + 5: [2023-05-25 13:38:05,735] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 42 + 3: [2023-05-25 13:38:05,735] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 29 + 6: [2023-05-25 13:38:05,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. + 6: [2023-05-25 13:38:05,736] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 48 +17: [2023-05-25 13:38:05,736] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 141 +18: [2023-05-25 13:38:05,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:05,736] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 146 +21: [2023-05-25 13:38:05,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:05,731] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 174 +21: [2023-05-25 13:38:05,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt. +21: [2023-05-25 13:38:05,734] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 170 + 5: [2023-05-25 13:38:05,739] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 46 +17: [2023-05-25 13:38:05,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt. +17: [2023-05-25 13:38:05,739] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 137 +17: [2023-05-25 13:38:05,741] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 138 +20: [2023-05-25 13:38:05,743] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 167 +22: [2023-05-25 13:38:05,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt. +22: [2023-05-25 13:38:05,745] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 177 +21: [2023-05-25 13:38:05,745] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 174 +23: [2023-05-25 13:38:05,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:05,746] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 185 + 0: could not find arguments in the checkpoint ... + 0: checkpoint version 3.0 +21: [2023-05-25 13:38:05,748] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 170 +18: [2023-05-25 13:38:05,748] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 146 + 6: [2023-05-25 13:38:05,751] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 48 +23: [2023-05-25 13:38:05,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt. +23: [2023-05-25 13:38:05,751] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 189 + 7: [2023-05-25 13:38:05,752] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 57 +17: [2023-05-25 13:38:05,753] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 137 + 6: [2023-05-25 13:38:05,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:05,754] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 50 +22: [2023-05-25 13:38:05,759] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 177 +23: [2023-05-25 13:38:05,760] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 185 + 5: [2023-05-25 13:38:05,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt. + 5: [2023-05-25 13:38:05,761] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 45 +23: [2023-05-25 13:38:05,763] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 189 + 3: [2023-05-25 13:38:05,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. + 3: [2023-05-25 13:38:05,764] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 28 + 6: [2023-05-25 13:38:05,770] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 50 + 5: [2023-05-25 13:38:05,776] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 45 + 7: [2023-05-25 13:38:05,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt. + 7: [2023-05-25 13:38:05,776] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 61 + 3: [2023-05-25 13:38:05,778] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 28 +18: [2023-05-25 13:38:05,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt. +18: [2023-05-25 13:38:05,780] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 151 +18: [2023-05-25 13:38:05,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt. +18: [2023-05-25 13:38:05,785] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 150 + 7: [2023-05-25 13:38:05,791] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 61 + 5: [2023-05-25 13:38:05,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. + 5: [2023-05-25 13:38:05,793] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 44 +18: [2023-05-25 13:38:05,794] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 151 +20: [2023-05-25 13:38:05,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt. +20: [2023-05-25 13:38:05,795] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 163 + 6: [2023-05-25 13:38:05,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,798] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 51 +18: [2023-05-25 13:38:05,800] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 150 +19: [2023-05-25 13:38:05,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt. +19: [2023-05-25 13:38:05,803] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 158 + 2: [2023-05-25 13:38:05,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,807] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 21 +19: [2023-05-25 13:38:05,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:05,808] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 159 + 5: [2023-05-25 13:38:05,808] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 44 +20: [2023-05-25 13:38:05,809] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 163 + 2: [2023-05-25 13:38:05,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,811] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 19 + 6: [2023-05-25 13:38:05,815] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 51 +19: [2023-05-25 13:38:05,817] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 158 +19: [2023-05-25 13:38:05,821] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 159 + 2: [2023-05-25 13:38:05,822] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 21 +20: [2023-05-25 13:38:05,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt. +20: [2023-05-25 13:38:05,826] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 165 + 2: [2023-05-25 13:38:05,827] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 19 +22: [2023-05-25 13:38:05,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt. +22: [2023-05-25 13:38:05,835] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 179 + 7: [2023-05-25 13:38:05,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:05,836] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 56 + 1: [2023-05-25 13:38:05,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt. +20: [2023-05-25 13:38:05,840] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 165 + 1: [2023-05-25 13:38:05,840] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 9 + 7: [2023-05-25 13:38:05,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,845] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 59 +22: [2023-05-25 13:38:05,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt. +22: [2023-05-25 13:38:05,846] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 178 +22: [2023-05-25 13:38:05,849] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 179 +21: [2023-05-25 13:38:05,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt. +21: [2023-05-25 13:38:05,828] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 171 +21: [2023-05-25 13:38:05,843] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 171 + 7: [2023-05-25 13:38:05,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt. + 7: [2023-05-25 13:38:05,852] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 63 + 3: [2023-05-25 13:38:05,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,852] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 30 + 7: [2023-05-25 13:38:05,854] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 56 + 1: [2023-05-25 13:38:05,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 9 +22: [2023-05-25 13:38:05,857] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 178 + 7: [2023-05-25 13:38:05,861] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 59 + 2: [2023-05-25 13:38:05,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt. + 2: [2023-05-25 13:38:05,862] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 17 + 0: [2023-05-25 13:38:05,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,866] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 5 + 3: [2023-05-25 13:38:05,867] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 30 + 7: [2023-05-25 13:38:05,871] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 63 + 3: [2023-05-25 13:38:05,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt. + 3: [2023-05-25 13:38:05,873] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 25 + 2: [2023-05-25 13:38:05,876] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 17 + 0: [2023-05-25 13:38:05,880] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 5 +19: [2023-05-25 13:38:05,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt. +19: [2023-05-25 13:38:05,883] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 155 + 3: [2023-05-25 13:38:05,887] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 25 + 1: [2023-05-25 13:38:05,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:05,894] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 14 + 1: [2023-05-25 13:38:05,895] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt. + 1: [2023-05-25 13:38:05,895] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 13 + 7: [2023-05-25 13:38:05,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. + 7: [2023-05-25 13:38:05,896] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 60 +19: [2023-05-25 13:38:05,897] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 155 + 7: [2023-05-25 13:38:05,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt. + 7: [2023-05-25 13:38:05,900] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 58 + 0: [2023-05-25 13:38:05,905] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:05,905] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 7 + 1: [2023-05-25 13:38:05,909] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 14 + 7: [2023-05-25 13:38:05,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 60 + 1: [2023-05-25 13:38:05,913] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 13 + 7: [2023-05-25 13:38:05,916] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 58 + 0: [2023-05-25 13:38:05,919] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 7 + 6: [2023-05-25 13:38:05,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt. + 6: [2023-05-25 13:38:05,925] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 55 + 0: [2023-05-25 13:38:05,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:05,931] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 6 + 2: [2023-05-25 13:38:05,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt. + 2: [2023-05-25 13:38:05,938] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 23 + 6: [2023-05-25 13:38:05,942] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 55 + 0: [2023-05-25 13:38:05,946] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 6 + 2: [2023-05-25 13:38:05,952] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 23 + 0: [2023-05-25 13:38:05,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt. + 0: [2023-05-25 13:38:05,952] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 1 + 2: [2023-05-25 13:38:05,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:05,957] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 22 + 3: [2023-05-25 13:38:05,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt. + 3: [2023-05-25 13:38:05,963] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 26 + 0: [2023-05-25 13:38:05,968] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 1 + 2: [2023-05-25 13:38:05,973] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 22 + 3: [2023-05-25 13:38:05,979] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 26 + 4: [2023-05-25 13:38:05,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt. + 4: [2023-05-25 13:38:05,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt. + 4: [2023-05-25 13:38:05,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 37 + 4: [2023-05-25 13:38:05,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 39 + 4: [2023-05-25 13:38:06,000] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 39 + 4: [2023-05-25 13:38:06,000] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 37 + 3: [2023-05-25 13:38:06,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:06,016] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 31 + 2: [2023-05-25 13:38:06,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt. + 2: [2023-05-25 13:38:06,022] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 18 +16: [2023-05-25 13:38:06,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt. +16: [2023-05-25 13:38:06,025] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 135 + 3: [2023-05-25 13:38:06,031] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 31 + 6: [2023-05-25 13:38:06,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt. + 6: [2023-05-25 13:38:06,034] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 53 + 0: [2023-05-25 13:38:06,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt. + 0: [2023-05-25 13:38:06,035] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 3 +16: [2023-05-25 13:38:06,036] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 135 + 4: [2023-05-25 13:38:06,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:06,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 38 + 5: [2023-05-25 13:38:06,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:06,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 43 + 2: [2023-05-25 13:38:06,039] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 18 + 1: [2023-05-25 13:38:06,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:06,042] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 15 + 4: [2023-05-25 13:38:06,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt. + 4: [2023-05-25 13:38:06,047] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 34 + 6: [2023-05-25 13:38:06,050] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 53 + 0: [2023-05-25 13:38:06,051] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 3 + 4: [2023-05-25 13:38:06,054] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 38 + 5: [2023-05-25 13:38:06,055] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 43 + 5: [2023-05-25 13:38:06,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt. + 5: [2023-05-25 13:38:06,056] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 47 + 0: [2023-05-25 13:38:06,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt. + 0: [2023-05-25 13:38:06,057] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 2 + 1: [2023-05-25 13:38:06,059] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 15 + 6: [2023-05-25 13:38:06,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt. + 6: [2023-05-25 13:38:06,061] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 54 + 4: [2023-05-25 13:38:06,062] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 34 + 1: [2023-05-25 13:38:06,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt. + 1: [2023-05-25 13:38:06,063] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 10 + 1: [2023-05-25 13:38:06,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt. + 1: [2023-05-25 13:38:06,068] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 11 + 5: [2023-05-25 13:38:06,073] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 47 + 0: [2023-05-25 13:38:06,075] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 2 + 6: [2023-05-25 13:38:06,077] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 54 + 1: [2023-05-25 13:38:06,079] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 10 + 1: [2023-05-25 13:38:06,085] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 11 + 3: [2023-05-25 13:38:06,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt. + 3: [2023-05-25 13:38:06,095] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 27 + 3: [2023-05-25 13:38:06,109] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 27 + 7: [2023-05-25 13:38:06,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b/global_step84877/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt. + 7: [2023-05-25 13:38:06,212] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 16 ZeRO state_dicts for rank 62 + 7: [2023-05-25 13:38:06,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 16 zero partition checkpoints for rank 62 + 0: successfully loaded checkpoint from lm1-8b7-178b-c4-repetitions/8b7178b13b at iteration 0 +31: time (ms) | load-checkpoint: 6029.05 + 0: estimated model parameters: 9.828646912 + 0: estimated model parameters without embeddings: 8.863956992 + 0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-05-25 13:38:06 + 0: > building train, validation, and test datasets ... + 0: > datasets target sizes (minimum size): + 0: train: 1 + 0: validation: 51200 + 0: test: 51200 + 0: > building train, validation, and test datasets for GPT ... + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.015118 seconds + 0: number of documents: 835726 + 0: > dataset split: + 0: train: + 0: document indices in [0, 835726) total of 835726 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_400M_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.011 seconds + 0: total number of samples: 195101 + 0: total number of epochs: 1 + 0: > building dataset index ... + 0: reading sizes... + 0: reading pointers... + 0: reading document index... + 0: creating numpy buffer of mmap... + 0: creating memory view of numpy buffer... + 0: > finished creating indexed dataset in 0.008158 seconds + 0: number of documents: 364608 + 0: > dataset split: + 0: validation: + 0: document indices in [0, 364608) total of 364608 documents + 0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_doc_idx.npy + 0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_sample_idx.npy + 0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_51200ns_2048sl_1234s_shuffle_idx.npy + 0: loaded indexed file in 0.011 seconds + 0: total number of samples: 84978 + 0: total number of epochs: 1 + 0: > finished creating GPT datasets ... + 0: [after dataloaders are built] datetime: 2023-05-25 13:38:13 + 0: done with setup ... + 0: training ... +31: time (ms) | model-and-optimizer-setup: 14997.17 | train/valid/test-data-iterators-setup: 3085.96 + 0: [after training is done] datetime: 2023-05-25 13:38:13 +31: ----------------------------------------------------------------------------------------------------------------- +31: validation loss at the end of training for val data | lm loss value: 2.451606E+00 | lm loss PPL: 1.160698E+01 | +31: ----------------------------------------------------------------------------------------------------------------- +END 3583606: Thu 25 May 2023 01:41:36 PM EEST