{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.2108843537415, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1.9912109375000002e-05, "loss": 1.9656, "step": 20 }, { "epoch": 0.31, "learning_rate": 1.98193359375e-05, "loss": 1.8445, "step": 40 }, { "epoch": 0.47, "learning_rate": 1.97216796875e-05, "loss": 1.59, "step": 60 }, { "epoch": 0.62, "learning_rate": 1.9624023437500002e-05, "loss": 1.6275, "step": 80 }, { "epoch": 0.78, "learning_rate": 1.9526367187500002e-05, "loss": 1.473, "step": 100 }, { "epoch": 0.93, "learning_rate": 1.9428710937500003e-05, "loss": 1.3701, "step": 120 }, { "epoch": 1.09, "learning_rate": 1.93310546875e-05, "loss": 1.3224, "step": 140 }, { "epoch": 1.24, "learning_rate": 1.92333984375e-05, "loss": 1.1423, "step": 160 }, { "epoch": 1.4, "learning_rate": 1.91357421875e-05, "loss": 1.1652, "step": 180 }, { "epoch": 1.55, "learning_rate": 1.9038085937500002e-05, "loss": 1.1422, "step": 200 }, { "epoch": 1.71, "learning_rate": 1.8940429687500002e-05, "loss": 1.002, "step": 220 }, { "epoch": 1.87, "learning_rate": 1.8842773437500003e-05, "loss": 1.0779, "step": 240 }, { "epoch": 2.02, "learning_rate": 1.87451171875e-05, "loss": 0.9887, "step": 260 }, { "epoch": 2.18, "learning_rate": 1.86474609375e-05, "loss": 0.9543, "step": 280 }, { "epoch": 2.33, "learning_rate": 1.85498046875e-05, "loss": 0.9371, "step": 300 }, { "epoch": 2.49, "learning_rate": 1.8452148437500002e-05, "loss": 0.8701, "step": 320 }, { "epoch": 2.64, "learning_rate": 1.8354492187500003e-05, "loss": 0.875, "step": 340 }, { "epoch": 2.8, "learning_rate": 1.82568359375e-05, "loss": 0.7843, "step": 360 }, { "epoch": 2.95, "learning_rate": 1.81591796875e-05, "loss": 0.7945, "step": 380 }, { "epoch": 3.11, "learning_rate": 1.80615234375e-05, "loss": 0.799, "step": 400 }, { "epoch": 3.27, "learning_rate": 1.79638671875e-05, "loss": 0.7623, "step": 420 }, { "epoch": 3.42, "learning_rate": 1.7866210937500002e-05, "loss": 0.7263, "step": 440 }, { "epoch": 3.58, "learning_rate": 1.7768554687500003e-05, "loss": 0.7779, "step": 460 }, { "epoch": 3.73, "learning_rate": 1.76708984375e-05, "loss": 0.695, "step": 480 }, { "epoch": 3.89, "learning_rate": 1.75732421875e-05, "loss": 0.7344, "step": 500 }, { "epoch": 4.04, "learning_rate": 1.74755859375e-05, "loss": 0.695, "step": 520 }, { "epoch": 4.2, "learning_rate": 1.7377929687500002e-05, "loss": 0.6504, "step": 540 }, { "epoch": 4.35, "learning_rate": 1.7280273437500002e-05, "loss": 0.6447, "step": 560 }, { "epoch": 4.51, "learning_rate": 1.7182617187500003e-05, "loss": 0.6931, "step": 580 }, { "epoch": 4.66, "learning_rate": 1.70849609375e-05, "loss": 0.6256, "step": 600 }, { "epoch": 4.82, "learning_rate": 1.69873046875e-05, "loss": 0.6132, "step": 620 }, { "epoch": 4.98, "learning_rate": 1.68896484375e-05, "loss": 0.6001, "step": 640 }, { "epoch": 5.13, "learning_rate": 1.6791992187500002e-05, "loss": 0.6176, "step": 660 }, { "epoch": 5.29, "learning_rate": 1.6694335937500002e-05, "loss": 0.5709, "step": 680 }, { "epoch": 5.44, "learning_rate": 1.65966796875e-05, "loss": 0.564, "step": 700 }, { "epoch": 5.6, "learning_rate": 1.64990234375e-05, "loss": 0.5969, "step": 720 }, { "epoch": 5.75, "learning_rate": 1.64013671875e-05, "loss": 0.5484, "step": 740 }, { "epoch": 5.91, "learning_rate": 1.63037109375e-05, "loss": 0.5667, "step": 760 }, { "epoch": 6.06, "learning_rate": 1.6206054687500002e-05, "loss": 0.5442, "step": 780 }, { "epoch": 6.22, "learning_rate": 1.6108398437500003e-05, "loss": 0.4857, "step": 800 }, { "epoch": 6.38, "learning_rate": 1.60107421875e-05, "loss": 0.5225, "step": 820 }, { "epoch": 6.53, "learning_rate": 1.59130859375e-05, "loss": 0.5457, "step": 840 }, { "epoch": 6.69, "learning_rate": 1.58154296875e-05, "loss": 0.5315, "step": 860 }, { "epoch": 6.84, "learning_rate": 1.57177734375e-05, "loss": 0.5345, "step": 880 }, { "epoch": 7.0, "learning_rate": 1.5620117187500002e-05, "loss": 0.5169, "step": 900 }, { "epoch": 7.15, "learning_rate": 1.5522460937500003e-05, "loss": 0.5115, "step": 920 }, { "epoch": 7.31, "learning_rate": 1.54248046875e-05, "loss": 0.4913, "step": 940 }, { "epoch": 7.46, "learning_rate": 1.53271484375e-05, "loss": 0.4868, "step": 960 }, { "epoch": 7.62, "learning_rate": 1.5229492187500001e-05, "loss": 0.5226, "step": 980 }, { "epoch": 7.77, "learning_rate": 1.5131835937500002e-05, "loss": 0.4061, "step": 1000 }, { "epoch": 7.93, "learning_rate": 1.50341796875e-05, "loss": 0.4488, "step": 1020 }, { "epoch": 8.09, "learning_rate": 1.4936523437500001e-05, "loss": 0.4443, "step": 1040 }, { "epoch": 8.24, "learning_rate": 1.4838867187500002e-05, "loss": 0.4646, "step": 1060 }, { "epoch": 8.4, "learning_rate": 1.47412109375e-05, "loss": 0.4513, "step": 1080 }, { "epoch": 8.55, "learning_rate": 1.4643554687500001e-05, "loss": 0.4394, "step": 1100 }, { "epoch": 8.71, "learning_rate": 1.4545898437500002e-05, "loss": 0.4337, "step": 1120 }, { "epoch": 8.86, "learning_rate": 1.44482421875e-05, "loss": 0.5037, "step": 1140 }, { "epoch": 9.02, "learning_rate": 1.4350585937500001e-05, "loss": 0.4209, "step": 1160 }, { "epoch": 9.17, "learning_rate": 1.42529296875e-05, "loss": 0.4073, "step": 1180 }, { "epoch": 9.33, "learning_rate": 1.4155273437500001e-05, "loss": 0.3879, "step": 1200 }, { "epoch": 9.48, "learning_rate": 1.4057617187500002e-05, "loss": 0.4434, "step": 1220 }, { "epoch": 9.64, "learning_rate": 1.39599609375e-05, "loss": 0.4062, "step": 1240 }, { "epoch": 9.8, "learning_rate": 1.3862304687500001e-05, "loss": 0.39, "step": 1260 }, { "epoch": 9.95, "learning_rate": 1.3764648437500002e-05, "loss": 0.4466, "step": 1280 }, { "epoch": 10.11, "learning_rate": 1.36669921875e-05, "loss": 0.4208, "step": 1300 }, { "epoch": 10.26, "learning_rate": 1.3569335937500001e-05, "loss": 0.3662, "step": 1320 }, { "epoch": 10.42, "learning_rate": 1.3471679687500002e-05, "loss": 0.4049, "step": 1340 }, { "epoch": 10.57, "learning_rate": 1.33740234375e-05, "loss": 0.3989, "step": 1360 }, { "epoch": 10.73, "learning_rate": 1.3276367187500001e-05, "loss": 0.3839, "step": 1380 }, { "epoch": 10.88, "learning_rate": 1.3178710937500002e-05, "loss": 0.4065, "step": 1400 }, { "epoch": 11.04, "learning_rate": 1.30810546875e-05, "loss": 0.3877, "step": 1420 }, { "epoch": 11.2, "learning_rate": 1.2983398437500001e-05, "loss": 0.4048, "step": 1440 }, { "epoch": 11.35, "learning_rate": 1.2885742187500002e-05, "loss": 0.3715, "step": 1460 }, { "epoch": 11.51, "learning_rate": 1.27880859375e-05, "loss": 0.3752, "step": 1480 }, { "epoch": 11.66, "learning_rate": 1.2690429687500001e-05, "loss": 0.3401, "step": 1500 }, { "epoch": 11.82, "learning_rate": 1.25927734375e-05, "loss": 0.3545, "step": 1520 }, { "epoch": 11.97, "learning_rate": 1.24951171875e-05, "loss": 0.3718, "step": 1540 }, { "epoch": 12.13, "learning_rate": 1.2397460937500001e-05, "loss": 0.3755, "step": 1560 }, { "epoch": 12.28, "learning_rate": 1.22998046875e-05, "loss": 0.3865, "step": 1580 }, { "epoch": 12.44, "learning_rate": 1.2202148437500001e-05, "loss": 0.3237, "step": 1600 }, { "epoch": 12.59, "learning_rate": 1.2104492187500001e-05, "loss": 0.3702, "step": 1620 }, { "epoch": 12.75, "learning_rate": 1.20068359375e-05, "loss": 0.3238, "step": 1640 }, { "epoch": 12.91, "learning_rate": 1.1909179687500001e-05, "loss": 0.3373, "step": 1660 }, { "epoch": 13.06, "learning_rate": 1.1811523437500002e-05, "loss": 0.3486, "step": 1680 }, { "epoch": 13.22, "learning_rate": 1.17138671875e-05, "loss": 0.362, "step": 1700 }, { "epoch": 13.37, "learning_rate": 1.1616210937500001e-05, "loss": 0.3257, "step": 1720 }, { "epoch": 13.53, "learning_rate": 1.1518554687500002e-05, "loss": 0.3414, "step": 1740 }, { "epoch": 13.68, "learning_rate": 1.14208984375e-05, "loss": 0.3121, "step": 1760 }, { "epoch": 13.84, "learning_rate": 1.1323242187500001e-05, "loss": 0.3598, "step": 1780 }, { "epoch": 13.99, "learning_rate": 1.1225585937500002e-05, "loss": 0.2906, "step": 1800 }, { "epoch": 14.15, "learning_rate": 1.11279296875e-05, "loss": 0.3356, "step": 1820 }, { "epoch": 14.31, "learning_rate": 1.1030273437500001e-05, "loss": 0.2991, "step": 1840 }, { "epoch": 14.46, "learning_rate": 1.09326171875e-05, "loss": 0.2987, "step": 1860 }, { "epoch": 14.62, "learning_rate": 1.08349609375e-05, "loss": 0.3249, "step": 1880 }, { "epoch": 14.77, "learning_rate": 1.0737304687500001e-05, "loss": 0.3425, "step": 1900 }, { "epoch": 14.93, "learning_rate": 1.06396484375e-05, "loss": 0.3398, "step": 1920 }, { "epoch": 15.08, "learning_rate": 1.05419921875e-05, "loss": 0.3018, "step": 1940 }, { "epoch": 15.24, "learning_rate": 1.0444335937500001e-05, "loss": 0.2475, "step": 1960 }, { "epoch": 15.39, "learning_rate": 1.03466796875e-05, "loss": 0.3507, "step": 1980 }, { "epoch": 15.55, "learning_rate": 1.02490234375e-05, "loss": 0.3084, "step": 2000 }, { "epoch": 15.7, "learning_rate": 1.0151367187500001e-05, "loss": 0.3212, "step": 2020 }, { "epoch": 15.86, "learning_rate": 1.00537109375e-05, "loss": 0.2831, "step": 2040 }, { "epoch": 16.02, "learning_rate": 9.956054687500001e-06, "loss": 0.3072, "step": 2060 }, { "epoch": 16.17, "learning_rate": 9.858398437500002e-06, "loss": 0.3293, "step": 2080 }, { "epoch": 16.33, "learning_rate": 9.7607421875e-06, "loss": 0.2738, "step": 2100 }, { "epoch": 16.48, "learning_rate": 9.663085937500001e-06, "loss": 0.3245, "step": 2120 }, { "epoch": 16.64, "learning_rate": 9.565429687500002e-06, "loss": 0.2846, "step": 2140 }, { "epoch": 16.79, "learning_rate": 9.4677734375e-06, "loss": 0.2906, "step": 2160 }, { "epoch": 16.95, "learning_rate": 9.370117187500001e-06, "loss": 0.263, "step": 2180 }, { "epoch": 17.1, "learning_rate": 9.2724609375e-06, "loss": 0.286, "step": 2200 }, { "epoch": 17.26, "learning_rate": 9.1748046875e-06, "loss": 0.3152, "step": 2220 }, { "epoch": 17.41, "learning_rate": 9.077148437500001e-06, "loss": 0.2811, "step": 2240 }, { "epoch": 17.57, "learning_rate": 8.9794921875e-06, "loss": 0.257, "step": 2260 }, { "epoch": 17.73, "learning_rate": 8.8818359375e-06, "loss": 0.267, "step": 2280 }, { "epoch": 17.88, "learning_rate": 8.784179687500001e-06, "loss": 0.3056, "step": 2300 }, { "epoch": 18.04, "learning_rate": 8.6865234375e-06, "loss": 0.2522, "step": 2320 }, { "epoch": 18.19, "learning_rate": 8.5888671875e-06, "loss": 0.2806, "step": 2340 }, { "epoch": 18.35, "learning_rate": 8.491210937500001e-06, "loss": 0.266, "step": 2360 }, { "epoch": 18.5, "learning_rate": 8.3935546875e-06, "loss": 0.288, "step": 2380 }, { "epoch": 18.66, "learning_rate": 8.2958984375e-06, "loss": 0.2712, "step": 2400 }, { "epoch": 18.81, "learning_rate": 8.198242187500001e-06, "loss": 0.3099, "step": 2420 }, { "epoch": 18.97, "learning_rate": 8.1005859375e-06, "loss": 0.2205, "step": 2440 }, { "epoch": 19.13, "learning_rate": 8.002929687500001e-06, "loss": 0.253, "step": 2460 }, { "epoch": 19.28, "learning_rate": 7.905273437500001e-06, "loss": 0.2885, "step": 2480 }, { "epoch": 19.44, "learning_rate": 7.8076171875e-06, "loss": 0.2326, "step": 2500 }, { "epoch": 19.59, "learning_rate": 7.709960937500001e-06, "loss": 0.255, "step": 2520 }, { "epoch": 19.75, "learning_rate": 7.612304687500001e-06, "loss": 0.2698, "step": 2540 }, { "epoch": 19.9, "learning_rate": 7.5146484375000004e-06, "loss": 0.2532, "step": 2560 }, { "epoch": 20.06, "learning_rate": 7.4169921875e-06, "loss": 0.2562, "step": 2580 }, { "epoch": 20.21, "learning_rate": 7.319335937500001e-06, "loss": 0.2717, "step": 2600 }, { "epoch": 20.37, "learning_rate": 7.2216796875000005e-06, "loss": 0.2652, "step": 2620 }, { "epoch": 20.52, "learning_rate": 7.1240234375e-06, "loss": 0.2654, "step": 2640 }, { "epoch": 20.68, "learning_rate": 7.026367187500001e-06, "loss": 0.2766, "step": 2660 }, { "epoch": 20.84, "learning_rate": 6.928710937500001e-06, "loss": 0.2246, "step": 2680 }, { "epoch": 20.99, "learning_rate": 6.8310546875e-06, "loss": 0.2546, "step": 2700 }, { "epoch": 21.15, "learning_rate": 6.733398437500001e-06, "loss": 0.2708, "step": 2720 }, { "epoch": 21.3, "learning_rate": 6.635742187500001e-06, "loss": 0.2385, "step": 2740 }, { "epoch": 21.46, "learning_rate": 6.5380859375e-06, "loss": 0.2271, "step": 2760 }, { "epoch": 21.61, "learning_rate": 6.4404296875e-06, "loss": 0.2874, "step": 2780 }, { "epoch": 21.77, "learning_rate": 6.342773437500001e-06, "loss": 0.2447, "step": 2800 }, { "epoch": 21.92, "learning_rate": 6.2451171875000005e-06, "loss": 0.2309, "step": 2820 }, { "epoch": 22.08, "learning_rate": 6.1474609375e-06, "loss": 0.2587, "step": 2840 }, { "epoch": 22.24, "learning_rate": 6.049804687500001e-06, "loss": 0.2303, "step": 2860 }, { "epoch": 22.39, "learning_rate": 5.952148437500001e-06, "loss": 0.2376, "step": 2880 }, { "epoch": 22.55, "learning_rate": 5.8544921875e-06, "loss": 0.2696, "step": 2900 }, { "epoch": 22.7, "learning_rate": 5.7568359375e-06, "loss": 0.2459, "step": 2920 }, { "epoch": 22.86, "learning_rate": 5.659179687500001e-06, "loss": 0.2032, "step": 2940 }, { "epoch": 23.01, "learning_rate": 5.5615234375e-06, "loss": 0.2491, "step": 2960 }, { "epoch": 23.17, "learning_rate": 5.4638671875e-06, "loss": 0.2395, "step": 2980 }, { "epoch": 23.32, "learning_rate": 5.366210937500001e-06, "loss": 0.2573, "step": 3000 }, { "epoch": 23.48, "learning_rate": 5.2685546875000005e-06, "loss": 0.2016, "step": 3020 }, { "epoch": 23.63, "learning_rate": 5.1708984375e-06, "loss": 0.2486, "step": 3040 }, { "epoch": 23.79, "learning_rate": 5.073242187500001e-06, "loss": 0.2209, "step": 3060 }, { "epoch": 23.95, "learning_rate": 4.9755859375000006e-06, "loss": 0.2373, "step": 3080 }, { "epoch": 24.1, "learning_rate": 4.8779296875e-06, "loss": 0.2433, "step": 3100 }, { "epoch": 24.26, "learning_rate": 4.7802734375e-06, "loss": 0.1869, "step": 3120 }, { "epoch": 24.41, "learning_rate": 4.682617187500001e-06, "loss": 0.2299, "step": 3140 }, { "epoch": 24.57, "learning_rate": 4.5849609375e-06, "loss": 0.2556, "step": 3160 }, { "epoch": 24.72, "learning_rate": 4.4873046875e-06, "loss": 0.2569, "step": 3180 }, { "epoch": 24.88, "learning_rate": 4.389648437500001e-06, "loss": 0.2298, "step": 3200 }, { "epoch": 25.03, "learning_rate": 4.2919921875000005e-06, "loss": 0.2368, "step": 3220 }, { "epoch": 25.19, "learning_rate": 4.1943359375e-06, "loss": 0.1928, "step": 3240 }, { "epoch": 25.34, "learning_rate": 4.0966796875e-06, "loss": 0.2253, "step": 3260 }, { "epoch": 25.5, "learning_rate": 3.9990234375000005e-06, "loss": 0.2369, "step": 3280 }, { "epoch": 25.66, "learning_rate": 3.9013671875e-06, "loss": 0.2117, "step": 3300 }, { "epoch": 25.81, "learning_rate": 3.8037109375000004e-06, "loss": 0.2407, "step": 3320 }, { "epoch": 25.97, "learning_rate": 3.7060546875e-06, "loss": 0.2451, "step": 3340 }, { "epoch": 26.12, "learning_rate": 3.6083984375000004e-06, "loss": 0.235, "step": 3360 }, { "epoch": 26.28, "learning_rate": 3.5107421875e-06, "loss": 0.2446, "step": 3380 }, { "epoch": 26.43, "learning_rate": 3.4130859375000003e-06, "loss": 0.1815, "step": 3400 }, { "epoch": 26.59, "learning_rate": 3.3154296875000004e-06, "loss": 0.2283, "step": 3420 }, { "epoch": 26.74, "learning_rate": 3.2177734375e-06, "loss": 0.2136, "step": 3440 }, { "epoch": 26.9, "learning_rate": 3.1201171875000003e-06, "loss": 0.2438, "step": 3460 }, { "epoch": 27.06, "learning_rate": 3.0224609375e-06, "loss": 0.2097, "step": 3480 }, { "epoch": 27.21, "learning_rate": 2.9248046875000003e-06, "loss": 0.2361, "step": 3500 } ], "max_steps": 4096, "num_train_epochs": 32, "total_flos": 4.5484736274432e+18, "trial_name": null, "trial_params": null }