bloom-finnish-176B-megatron / .gitattributes
Muennighoff's picture
Add
6f130b0
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_57_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_87_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_59_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_37_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_36_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_83_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_94_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_81_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_91_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_16_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_39_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_82_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_05_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_74_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_04_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_68_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_30_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_07_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_27_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_29_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_61_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_78_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_18_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_41_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_92_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_53_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_62_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_35_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_32_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_64_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_77_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_46_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_44_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_06_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_95_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_75_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_13_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_60_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_50_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_88_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_40_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_65_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_09_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_58_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_67_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_08_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_51_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_71_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_43_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_70_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_72_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_19_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_85_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_14_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_86_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_89_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_93_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_25_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_20_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_11_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_61-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_49_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_23_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_36-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_12_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_17-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_38_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_20-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_17_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_31_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_45_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_42_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_48_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_16-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_01-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_29-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_10_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_22_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_31-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_83_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_40-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_49-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_55-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_28_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_60-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_10-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_14-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_90_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_33_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_05-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_43-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_76_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_39-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_80_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_08-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_18-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_11_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_26_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_18_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_50_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_63_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_71-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_02_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_80_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_36_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_66_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_41-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_65-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_55_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_26_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_05_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_52-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_69_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_42-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_61_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_91_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_52_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_86_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_19-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_15_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_06-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_46-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_45_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_71_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_67-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_08_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_03_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_00_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_63-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_26-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_68-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_64_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_56_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_72-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_22_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_30-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_69_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_59-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_46_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_33-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_15-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_49_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_62_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_62-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_01_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_54-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_34-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_22-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_07_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_77_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_68_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_58-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_32_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_03-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_02_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_55_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_65_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_37-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_38-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_50-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_01_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_25_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_16_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_45-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_29_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_66-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_60_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_72_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_25-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_64-model_05-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_58_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_06_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_13_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_17_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_53-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_82_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_12-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_23-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_42_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_04-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_07-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_44_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_89_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_95_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_27_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_27-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_24_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_34_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_04_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_09-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_24-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_69-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_39_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_70_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_9_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_79_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_33_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_78_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_20_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_47_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_35-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_14_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_56_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_57_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_43_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_87_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_21_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_79_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_76_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_03_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_92_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_34_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_37_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_23_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_54_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_51-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_84_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_41_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_12_mp_rank_74_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_40_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_19_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_15_mp_rank_24_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_21-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_28-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_12_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_73_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_85_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_30_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_15_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_74-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_90_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_09_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_7_mp_rank_67_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_11-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_10_mp_rank_63_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_13_mp_rank_59_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_13-model_07-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_47_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_38_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_84_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_28_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_11_mp_rank_75_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_51_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_31_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_56-model_03-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_54_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_21_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_53_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_47-model_06-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_48-model_00-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_6_mp_rank_35_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_8_mp_rank_66_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_32-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_57-model_04-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_94_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_14_mp_rank_81_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_2_mp_rank_88_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_5_mp_rank_10_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_44-model_01-model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/mp_rank_73_model_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_1_mp_rank_93_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_3_mp_rank_48_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/bf16_zero_pp_rank_4_mp_rank_52_optim_states.pt filter=lfs diff=lfs merge=lfs -text
global_step105545/layer_70-model_02-model_states.pt filter=lfs diff=lfs merge=lfs -text