{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 3000, "global_step": 1080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00019814814814814814, "loss": 2.1983, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.0001962962962962963, "loss": 2.1208, "step": 20 }, { "epoch": 0.11, "learning_rate": 0.00019444444444444446, "loss": 1.8985, "step": 30 }, { "epoch": 0.15, "learning_rate": 0.0001925925925925926, "loss": 1.726, "step": 40 }, { "epoch": 0.19, "learning_rate": 0.00019074074074074075, "loss": 1.6799, "step": 50 }, { "epoch": 0.22, "learning_rate": 0.00018888888888888888, "loss": 1.5807, "step": 60 }, { "epoch": 0.26, "learning_rate": 0.00018703703703703704, "loss": 1.5165, "step": 70 }, { "epoch": 0.3, "learning_rate": 0.0001851851851851852, "loss": 1.4893, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00018333333333333334, "loss": 1.24, "step": 90 }, { "epoch": 0.37, "learning_rate": 0.0001814814814814815, "loss": 1.2977, "step": 100 }, { "epoch": 0.41, "learning_rate": 0.00017962962962962963, "loss": 1.2492, "step": 110 }, { "epoch": 0.44, "learning_rate": 0.00017777777777777779, "loss": 1.1566, "step": 120 }, { "epoch": 0.48, "learning_rate": 0.00017592592592592595, "loss": 1.3443, "step": 130 }, { "epoch": 0.52, "learning_rate": 0.00017407407407407408, "loss": 1.2112, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.00017222222222222224, "loss": 1.0302, "step": 150 }, { "epoch": 0.59, "learning_rate": 0.00017037037037037037, "loss": 1.1856, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.00016851851851851853, "loss": 1.1745, "step": 170 }, { "epoch": 0.67, "learning_rate": 0.0001666666666666667, "loss": 1.0659, "step": 180 }, { "epoch": 0.7, "learning_rate": 0.00016481481481481482, "loss": 1.0134, "step": 190 }, { "epoch": 0.74, "learning_rate": 0.00016296296296296295, "loss": 0.9692, "step": 200 }, { "epoch": 0.78, "learning_rate": 0.0001611111111111111, "loss": 0.8877, "step": 210 }, { "epoch": 0.81, "learning_rate": 0.00015925925925925927, "loss": 0.8419, "step": 220 }, { "epoch": 0.85, "learning_rate": 0.00015740740740740743, "loss": 0.9147, "step": 230 }, { "epoch": 0.89, "learning_rate": 0.00015555555555555556, "loss": 1.0111, "step": 240 }, { "epoch": 0.93, "learning_rate": 0.0001537037037037037, "loss": 0.9457, "step": 250 }, { "epoch": 0.96, "learning_rate": 0.00015185185185185185, "loss": 0.9867, "step": 260 }, { "epoch": 1.0, "learning_rate": 0.00015000000000000001, "loss": 1.1366, "step": 270 }, { "epoch": 1.04, "learning_rate": 0.00014814814814814815, "loss": 0.7648, "step": 280 }, { "epoch": 1.07, "learning_rate": 0.0001462962962962963, "loss": 0.8693, "step": 290 }, { "epoch": 1.11, "learning_rate": 0.00014444444444444444, "loss": 0.8179, "step": 300 }, { "epoch": 1.15, "learning_rate": 0.0001425925925925926, "loss": 0.8077, "step": 310 }, { "epoch": 1.19, "learning_rate": 0.00014074074074074076, "loss": 0.941, "step": 320 }, { "epoch": 1.22, "learning_rate": 0.0001388888888888889, "loss": 0.8906, "step": 330 }, { "epoch": 1.26, "learning_rate": 0.00013703703703703705, "loss": 0.8433, "step": 340 }, { "epoch": 1.3, "learning_rate": 0.00013518518518518518, "loss": 0.7654, "step": 350 }, { "epoch": 1.33, "learning_rate": 0.00013333333333333334, "loss": 0.7217, "step": 360 }, { "epoch": 1.37, "learning_rate": 0.0001314814814814815, "loss": 0.8744, "step": 370 }, { "epoch": 1.41, "learning_rate": 0.00012962962962962963, "loss": 0.6337, "step": 380 }, { "epoch": 1.44, "learning_rate": 0.00012777777777777776, "loss": 0.6369, "step": 390 }, { "epoch": 1.48, "learning_rate": 0.00012592592592592592, "loss": 0.5268, "step": 400 }, { "epoch": 1.52, "learning_rate": 0.00012407407407407408, "loss": 0.8015, "step": 410 }, { "epoch": 1.56, "learning_rate": 0.00012222222222222224, "loss": 0.5013, "step": 420 }, { "epoch": 1.59, "learning_rate": 0.00012037037037037037, "loss": 0.6063, "step": 430 }, { "epoch": 1.63, "learning_rate": 0.00011851851851851852, "loss": 0.7767, "step": 440 }, { "epoch": 1.67, "learning_rate": 0.00011666666666666668, "loss": 0.5174, "step": 450 }, { "epoch": 1.7, "learning_rate": 0.00011481481481481482, "loss": 0.6391, "step": 460 }, { "epoch": 1.74, "learning_rate": 0.00011296296296296296, "loss": 0.4966, "step": 470 }, { "epoch": 1.78, "learning_rate": 0.00011111111111111112, "loss": 0.5991, "step": 480 }, { "epoch": 1.81, "learning_rate": 0.00010925925925925926, "loss": 0.5499, "step": 490 }, { "epoch": 1.85, "learning_rate": 0.00010740740740740742, "loss": 0.5488, "step": 500 }, { "epoch": 1.89, "learning_rate": 0.00010555555555555557, "loss": 0.5834, "step": 510 }, { "epoch": 1.93, "learning_rate": 0.0001037037037037037, "loss": 0.6238, "step": 520 }, { "epoch": 1.96, "learning_rate": 0.00010185185185185186, "loss": 0.6365, "step": 530 }, { "epoch": 2.0, "learning_rate": 0.0001, "loss": 0.5337, "step": 540 }, { "epoch": 2.04, "learning_rate": 9.814814814814815e-05, "loss": 0.4326, "step": 550 }, { "epoch": 2.07, "learning_rate": 9.62962962962963e-05, "loss": 0.4197, "step": 560 }, { "epoch": 2.11, "learning_rate": 9.444444444444444e-05, "loss": 0.3268, "step": 570 }, { "epoch": 2.15, "learning_rate": 9.25925925925926e-05, "loss": 0.3066, "step": 580 }, { "epoch": 2.19, "learning_rate": 9.074074074074075e-05, "loss": 0.4737, "step": 590 }, { "epoch": 2.22, "learning_rate": 8.888888888888889e-05, "loss": 0.3185, "step": 600 }, { "epoch": 2.26, "learning_rate": 8.703703703703704e-05, "loss": 0.4233, "step": 610 }, { "epoch": 2.3, "learning_rate": 8.518518518518518e-05, "loss": 0.3377, "step": 620 }, { "epoch": 2.33, "learning_rate": 8.333333333333334e-05, "loss": 0.3957, "step": 630 }, { "epoch": 2.37, "learning_rate": 8.148148148148148e-05, "loss": 0.3915, "step": 640 }, { "epoch": 2.41, "learning_rate": 7.962962962962964e-05, "loss": 0.3025, "step": 650 }, { "epoch": 2.44, "learning_rate": 7.777777777777778e-05, "loss": 0.2896, "step": 660 }, { "epoch": 2.48, "learning_rate": 7.592592592592593e-05, "loss": 0.2558, "step": 670 }, { "epoch": 2.52, "learning_rate": 7.407407407407407e-05, "loss": 0.3477, "step": 680 }, { "epoch": 2.56, "learning_rate": 7.222222222222222e-05, "loss": 0.2111, "step": 690 }, { "epoch": 2.59, "learning_rate": 7.037037037037038e-05, "loss": 0.2885, "step": 700 }, { "epoch": 2.63, "learning_rate": 6.851851851851852e-05, "loss": 0.2953, "step": 710 }, { "epoch": 2.67, "learning_rate": 6.666666666666667e-05, "loss": 0.2415, "step": 720 }, { "epoch": 2.7, "learning_rate": 6.481481481481482e-05, "loss": 0.3242, "step": 730 }, { "epoch": 2.74, "learning_rate": 6.296296296296296e-05, "loss": 0.2616, "step": 740 }, { "epoch": 2.78, "learning_rate": 6.111111111111112e-05, "loss": 0.2853, "step": 750 }, { "epoch": 2.81, "learning_rate": 5.925925925925926e-05, "loss": 0.2828, "step": 760 }, { "epoch": 2.85, "learning_rate": 5.740740740740741e-05, "loss": 0.2382, "step": 770 }, { "epoch": 2.89, "learning_rate": 5.555555555555556e-05, "loss": 0.3508, "step": 780 }, { "epoch": 2.93, "learning_rate": 5.370370370370371e-05, "loss": 0.2794, "step": 790 }, { "epoch": 2.96, "learning_rate": 5.185185185185185e-05, "loss": 0.3247, "step": 800 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 0.2753, "step": 810 }, { "epoch": 3.04, "learning_rate": 4.814814814814815e-05, "loss": 0.1453, "step": 820 }, { "epoch": 3.07, "learning_rate": 4.62962962962963e-05, "loss": 0.1666, "step": 830 }, { "epoch": 3.11, "learning_rate": 4.4444444444444447e-05, "loss": 0.1369, "step": 840 }, { "epoch": 3.15, "learning_rate": 4.259259259259259e-05, "loss": 0.1086, "step": 850 }, { "epoch": 3.19, "learning_rate": 4.074074074074074e-05, "loss": 0.0967, "step": 860 }, { "epoch": 3.22, "learning_rate": 3.888888888888889e-05, "loss": 0.1327, "step": 870 }, { "epoch": 3.26, "learning_rate": 3.7037037037037037e-05, "loss": 0.0848, "step": 880 }, { "epoch": 3.3, "learning_rate": 3.518518518518519e-05, "loss": 0.1173, "step": 890 }, { "epoch": 3.33, "learning_rate": 3.3333333333333335e-05, "loss": 0.135, "step": 900 }, { "epoch": 3.37, "learning_rate": 3.148148148148148e-05, "loss": 0.1979, "step": 910 }, { "epoch": 3.41, "learning_rate": 2.962962962962963e-05, "loss": 0.1181, "step": 920 }, { "epoch": 3.44, "learning_rate": 2.777777777777778e-05, "loss": 0.0957, "step": 930 }, { "epoch": 3.48, "learning_rate": 2.5925925925925925e-05, "loss": 0.0927, "step": 940 }, { "epoch": 3.52, "learning_rate": 2.4074074074074074e-05, "loss": 0.094, "step": 950 }, { "epoch": 3.56, "learning_rate": 2.2222222222222223e-05, "loss": 0.1197, "step": 960 }, { "epoch": 3.59, "learning_rate": 2.037037037037037e-05, "loss": 0.0927, "step": 970 }, { "epoch": 3.63, "learning_rate": 1.8518518518518518e-05, "loss": 0.1523, "step": 980 }, { "epoch": 3.67, "learning_rate": 1.6666666666666667e-05, "loss": 0.2582, "step": 990 }, { "epoch": 3.7, "learning_rate": 1.4814814814814815e-05, "loss": 0.1101, "step": 1000 }, { "epoch": 3.74, "learning_rate": 1.2962962962962962e-05, "loss": 0.1582, "step": 1010 }, { "epoch": 3.78, "learning_rate": 1.1111111111111112e-05, "loss": 0.0504, "step": 1020 }, { "epoch": 3.81, "learning_rate": 9.259259259259259e-06, "loss": 0.0788, "step": 1030 }, { "epoch": 3.85, "learning_rate": 7.4074074074074075e-06, "loss": 0.0607, "step": 1040 }, { "epoch": 3.89, "learning_rate": 5.555555555555556e-06, "loss": 0.1061, "step": 1050 }, { "epoch": 3.93, "learning_rate": 3.7037037037037037e-06, "loss": 0.0645, "step": 1060 }, { "epoch": 3.96, "learning_rate": 1.8518518518518519e-06, "loss": 0.0971, "step": 1070 }, { "epoch": 4.0, "learning_rate": 0.0, "loss": 0.128, "step": 1080 }, { "epoch": 4.0, "step": 1080, "total_flos": 1.339145591637934e+18, "train_loss": 0.6042599819324634, "train_runtime": 412.4773, "train_samples_per_second": 41.893, "train_steps_per_second": 2.618 } ], "logging_steps": 10, "max_steps": 1080, "num_train_epochs": 4, "save_steps": 3000, "total_flos": 1.339145591637934e+18, "trial_name": null, "trial_params": null }