{ "best_metric": null, "best_model_checkpoint": null, "epoch": 18.0, "global_step": 27360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1e-07, "loss": 6.0775, "step": 38 }, { "epoch": 0.05, "learning_rate": 1e-07, "loss": 6.0462, "step": 76 }, { "epoch": 0.07, "learning_rate": 1e-07, "loss": 6.0014, "step": 114 }, { "epoch": 0.1, "learning_rate": 1e-07, "loss": 5.9744, "step": 152 }, { "epoch": 0.12, "learning_rate": 1e-07, "loss": 5.886, "step": 190 }, { "epoch": 0.15, "learning_rate": 1e-07, "loss": 5.8752, "step": 228 }, { "epoch": 0.17, "learning_rate": 1e-07, "loss": 5.8586, "step": 266 }, { "epoch": 0.2, "learning_rate": 1e-07, "loss": 5.7714, "step": 304 }, { "epoch": 0.23, "learning_rate": 1e-07, "loss": 5.7406, "step": 342 }, { "epoch": 0.25, "learning_rate": 1e-07, "loss": 5.7096, "step": 380 }, { "epoch": 0.28, "learning_rate": 1e-07, "loss": 5.6967, "step": 418 }, { "epoch": 0.3, "learning_rate": 1e-07, "loss": 5.6248, "step": 456 }, { "epoch": 0.33, "learning_rate": 1e-07, "loss": 5.5632, "step": 494 }, { "epoch": 0.35, "learning_rate": 1e-07, "loss": 5.5463, "step": 532 }, { "epoch": 0.38, "learning_rate": 1e-07, "loss": 5.518, "step": 570 }, { "epoch": 0.4, "learning_rate": 1e-07, "loss": 5.4135, "step": 608 }, { "epoch": 0.42, "learning_rate": 1e-07, "loss": 5.4107, "step": 646 }, { "epoch": 0.45, "learning_rate": 1e-07, "loss": 5.3979, "step": 684 }, { "epoch": 0.47, "learning_rate": 1e-07, "loss": 5.3563, "step": 722 }, { "epoch": 0.5, "learning_rate": 1e-07, "loss": 5.3835, "step": 760 }, { "epoch": 0.53, "learning_rate": 1e-07, "loss": 5.3005, "step": 798 }, { "epoch": 0.55, "learning_rate": 1e-07, "loss": 5.2324, "step": 836 }, { "epoch": 0.57, "learning_rate": 1e-07, "loss": 5.2447, "step": 874 }, { "epoch": 0.6, "learning_rate": 1e-07, "loss": 5.1957, "step": 912 }, { "epoch": 0.62, "learning_rate": 1e-07, "loss": 5.1413, "step": 950 }, { "epoch": 0.65, "learning_rate": 1e-07, "loss": 5.1094, "step": 988 }, { "epoch": 0.68, "learning_rate": 1e-07, "loss": 5.0822, "step": 1026 }, { "epoch": 0.7, "learning_rate": 1e-07, "loss": 5.0826, "step": 1064 }, { "epoch": 0.72, "learning_rate": 1e-07, "loss": 5.0354, "step": 1102 }, { "epoch": 0.75, "learning_rate": 1e-07, "loss": 4.9899, "step": 1140 }, { "epoch": 0.78, "learning_rate": 1e-07, "loss": 5.0207, "step": 1178 }, { "epoch": 0.8, "learning_rate": 1e-07, "loss": 4.9249, "step": 1216 }, { "epoch": 0.82, "learning_rate": 1e-07, "loss": 4.9205, "step": 1254 }, { "epoch": 0.85, "learning_rate": 1e-07, "loss": 4.8866, "step": 1292 }, { "epoch": 0.88, "learning_rate": 1e-07, "loss": 4.9054, "step": 1330 }, { "epoch": 0.9, "learning_rate": 1e-07, "loss": 4.9223, "step": 1368 }, { "epoch": 0.93, "learning_rate": 1e-07, "loss": 4.8158, "step": 1406 }, { "epoch": 0.95, "learning_rate": 1e-07, "loss": 4.842, "step": 1444 }, { "epoch": 0.97, "learning_rate": 1e-07, "loss": 4.7924, "step": 1482 }, { "epoch": 1.0, "learning_rate": 1e-07, "loss": 4.7735, "step": 1520 }, { "epoch": 1.02, "learning_rate": 1e-07, "loss": 4.803, "step": 1558 }, { "epoch": 1.05, "learning_rate": 1e-07, "loss": 4.7616, "step": 1596 }, { "epoch": 1.07, "learning_rate": 1e-07, "loss": 4.7375, "step": 1634 }, { "epoch": 1.1, "learning_rate": 1e-07, "loss": 4.7046, "step": 1672 }, { "epoch": 1.12, "learning_rate": 1e-07, "loss": 4.6259, "step": 1710 }, { "epoch": 1.15, "learning_rate": 1e-07, "loss": 4.6591, "step": 1748 }, { "epoch": 1.18, "learning_rate": 1e-07, "loss": 4.6459, "step": 1786 }, { "epoch": 1.2, "learning_rate": 1e-07, "loss": 4.5775, "step": 1824 }, { "epoch": 1.23, "learning_rate": 1e-07, "loss": 4.5543, "step": 1862 }, { "epoch": 1.25, "learning_rate": 1e-07, "loss": 4.6039, "step": 1900 }, { "epoch": 1.27, "learning_rate": 1e-07, "loss": 4.5725, "step": 1938 }, { "epoch": 1.3, "learning_rate": 1e-07, "loss": 4.55, "step": 1976 }, { "epoch": 1.32, "learning_rate": 1e-07, "loss": 4.4842, "step": 2014 }, { "epoch": 1.35, "learning_rate": 1e-07, "loss": 4.4519, "step": 2052 }, { "epoch": 1.38, "learning_rate": 1e-07, "loss": 4.4428, "step": 2090 }, { "epoch": 1.4, "learning_rate": 1e-07, "loss": 4.4975, "step": 2128 }, { "epoch": 1.43, "learning_rate": 1e-07, "loss": 4.4482, "step": 2166 }, { "epoch": 1.45, "learning_rate": 1e-07, "loss": 4.439, "step": 2204 }, { "epoch": 1.48, "learning_rate": 1e-07, "loss": 4.4258, "step": 2242 }, { "epoch": 1.5, "learning_rate": 1e-07, "loss": 4.4161, "step": 2280 }, { "epoch": 1.52, "learning_rate": 1e-07, "loss": 4.4324, "step": 2318 }, { "epoch": 1.55, "learning_rate": 1e-07, "loss": 4.3676, "step": 2356 }, { "epoch": 1.57, "learning_rate": 1e-07, "loss": 4.35, "step": 2394 }, { "epoch": 1.6, "learning_rate": 1e-07, "loss": 4.3224, "step": 2432 }, { "epoch": 1.62, "learning_rate": 1e-07, "loss": 4.3432, "step": 2470 }, { "epoch": 1.65, "learning_rate": 1e-07, "loss": 4.2577, "step": 2508 }, { "epoch": 1.68, "learning_rate": 1e-07, "loss": 4.385, "step": 2546 }, { "epoch": 1.7, "learning_rate": 1e-07, "loss": 4.2489, "step": 2584 }, { "epoch": 1.73, "learning_rate": 1e-07, "loss": 4.2986, "step": 2622 }, { "epoch": 1.75, "learning_rate": 1e-07, "loss": 4.2423, "step": 2660 }, { "epoch": 1.77, "learning_rate": 1e-07, "loss": 4.2712, "step": 2698 }, { "epoch": 1.8, "learning_rate": 1e-07, "loss": 4.2693, "step": 2736 }, { "epoch": 1.82, "learning_rate": 1e-07, "loss": 4.1746, "step": 2774 }, { "epoch": 1.85, "learning_rate": 1e-07, "loss": 4.1345, "step": 2812 }, { "epoch": 1.88, "learning_rate": 1e-07, "loss": 4.0853, "step": 2850 }, { "epoch": 1.9, "learning_rate": 1e-07, "loss": 4.1348, "step": 2888 }, { "epoch": 1.93, "learning_rate": 1e-07, "loss": 4.1303, "step": 2926 }, { "epoch": 1.95, "learning_rate": 1e-07, "loss": 4.0622, "step": 2964 }, { "epoch": 1.98, "learning_rate": 1e-07, "loss": 4.0644, "step": 3002 }, { "epoch": 2.0, "learning_rate": 1e-07, "loss": 4.0578, "step": 3040 }, { "epoch": 2.02, "learning_rate": 1e-07, "loss": 3.9785, "step": 3078 }, { "epoch": 2.05, "learning_rate": 1e-07, "loss": 4.0323, "step": 3116 }, { "epoch": 2.08, "learning_rate": 1e-07, "loss": 4.0035, "step": 3154 }, { "epoch": 2.1, "learning_rate": 1e-07, "loss": 3.9697, "step": 3192 }, { "epoch": 2.12, "learning_rate": 1e-07, "loss": 3.9225, "step": 3230 }, { "epoch": 2.15, "learning_rate": 1e-07, "loss": 3.9822, "step": 3268 }, { "epoch": 2.17, "learning_rate": 1e-07, "loss": 3.9636, "step": 3306 }, { "epoch": 2.2, "learning_rate": 1e-07, "loss": 3.9574, "step": 3344 }, { "epoch": 2.23, "learning_rate": 1e-07, "loss": 3.8573, "step": 3382 }, { "epoch": 2.25, "learning_rate": 1e-07, "loss": 4.0107, "step": 3420 }, { "epoch": 2.27, "learning_rate": 1e-07, "loss": 3.8145, "step": 3458 }, { "epoch": 2.3, "learning_rate": 1e-07, "loss": 3.8424, "step": 3496 }, { "epoch": 2.33, "learning_rate": 1e-07, "loss": 3.8641, "step": 3534 }, { "epoch": 2.35, "learning_rate": 1e-07, "loss": 3.8606, "step": 3572 }, { "epoch": 2.38, "learning_rate": 1e-07, "loss": 3.7903, "step": 3610 }, { "epoch": 2.4, "learning_rate": 1e-07, "loss": 3.7378, "step": 3648 }, { "epoch": 2.42, "learning_rate": 1e-07, "loss": 3.7713, "step": 3686 }, { "epoch": 2.45, "learning_rate": 1e-07, "loss": 3.834, "step": 3724 }, { "epoch": 2.48, "learning_rate": 1e-07, "loss": 3.7785, "step": 3762 }, { "epoch": 2.5, "learning_rate": 1e-07, "loss": 3.8277, "step": 3800 }, { "epoch": 2.52, "learning_rate": 1e-07, "loss": 3.7287, "step": 3838 }, { "epoch": 2.55, "learning_rate": 1e-07, "loss": 3.7123, "step": 3876 }, { "epoch": 2.58, "learning_rate": 1e-07, "loss": 3.7185, "step": 3914 }, { "epoch": 2.6, "learning_rate": 1e-07, "loss": 3.6936, "step": 3952 }, { "epoch": 2.62, "learning_rate": 1e-07, "loss": 3.7462, "step": 3990 }, { "epoch": 2.65, "learning_rate": 1e-07, "loss": 3.6844, "step": 4028 }, { "epoch": 2.67, "learning_rate": 1e-07, "loss": 3.6709, "step": 4066 }, { "epoch": 2.7, "learning_rate": 1e-07, "loss": 3.6508, "step": 4104 }, { "epoch": 2.73, "learning_rate": 1e-07, "loss": 3.6247, "step": 4142 }, { "epoch": 2.75, "learning_rate": 1e-07, "loss": 3.6459, "step": 4180 }, { "epoch": 2.77, "learning_rate": 1e-07, "loss": 3.6169, "step": 4218 }, { "epoch": 2.8, "learning_rate": 1e-07, "loss": 3.6944, "step": 4256 }, { "epoch": 2.83, "learning_rate": 1e-07, "loss": 3.6458, "step": 4294 }, { "epoch": 2.85, "learning_rate": 1e-07, "loss": 3.5598, "step": 4332 }, { "epoch": 2.88, "learning_rate": 1e-07, "loss": 3.5221, "step": 4370 }, { "epoch": 2.9, "learning_rate": 1e-07, "loss": 3.6462, "step": 4408 }, { "epoch": 2.92, "learning_rate": 1e-07, "loss": 3.6155, "step": 4446 }, { "epoch": 2.95, "learning_rate": 1e-07, "loss": 3.5649, "step": 4484 }, { "epoch": 2.98, "learning_rate": 1e-07, "loss": 3.5088, "step": 4522 }, { "epoch": 3.0, "learning_rate": 1e-07, "loss": 3.6479, "step": 4560 }, { "epoch": 3.02, "learning_rate": 1e-07, "loss": 3.5495, "step": 4598 }, { "epoch": 3.05, "learning_rate": 1e-07, "loss": 3.4842, "step": 4636 }, { "epoch": 3.08, "learning_rate": 1e-07, "loss": 3.6004, "step": 4674 }, { "epoch": 3.1, "learning_rate": 1e-07, "loss": 3.4871, "step": 4712 }, { "epoch": 3.12, "learning_rate": 1e-07, "loss": 3.5237, "step": 4750 }, { "epoch": 3.15, "learning_rate": 1e-07, "loss": 3.5153, "step": 4788 }, { "epoch": 3.17, "learning_rate": 1e-07, "loss": 3.4592, "step": 4826 }, { "epoch": 3.2, "learning_rate": 1e-07, "loss": 3.5256, "step": 4864 }, { "epoch": 3.23, "learning_rate": 1e-07, "loss": 3.4191, "step": 4902 }, { "epoch": 3.25, "learning_rate": 1e-07, "loss": 3.4433, "step": 4940 }, { "epoch": 3.27, "learning_rate": 1e-07, "loss": 3.51, "step": 4978 }, { "epoch": 3.3, "learning_rate": 1e-07, "loss": 3.4299, "step": 5016 }, { "epoch": 3.33, "learning_rate": 1e-07, "loss": 3.4375, "step": 5054 }, { "epoch": 3.35, "learning_rate": 1e-07, "loss": 3.4766, "step": 5092 }, { "epoch": 3.38, "learning_rate": 1e-07, "loss": 3.4243, "step": 5130 }, { "epoch": 3.4, "learning_rate": 1e-07, "loss": 3.4521, "step": 5168 }, { "epoch": 3.42, "learning_rate": 1e-07, "loss": 3.4048, "step": 5206 }, { "epoch": 3.45, "learning_rate": 1e-07, "loss": 3.4903, "step": 5244 }, { "epoch": 3.48, "learning_rate": 1e-07, "loss": 3.3147, "step": 5282 }, { "epoch": 3.5, "learning_rate": 1e-07, "loss": 3.4223, "step": 5320 }, { "epoch": 3.52, "learning_rate": 1e-07, "loss": 3.3592, "step": 5358 }, { "epoch": 3.55, "learning_rate": 1e-07, "loss": 3.4287, "step": 5396 }, { "epoch": 3.58, "learning_rate": 1e-07, "loss": 3.2888, "step": 5434 }, { "epoch": 3.6, "learning_rate": 1e-07, "loss": 3.352, "step": 5472 }, { "epoch": 3.62, "learning_rate": 1e-07, "loss": 3.352, "step": 5510 }, { "epoch": 3.65, "learning_rate": 1e-07, "loss": 3.4211, "step": 5548 }, { "epoch": 3.67, "learning_rate": 1e-07, "loss": 3.4113, "step": 5586 }, { "epoch": 3.7, "learning_rate": 1e-07, "loss": 3.2818, "step": 5624 }, { "epoch": 3.73, "learning_rate": 1e-07, "loss": 3.368, "step": 5662 }, { "epoch": 3.75, "learning_rate": 1e-07, "loss": 3.3881, "step": 5700 }, { "epoch": 3.77, "learning_rate": 1e-07, "loss": 3.3362, "step": 5738 }, { "epoch": 3.8, "learning_rate": 1e-07, "loss": 3.3529, "step": 5776 }, { "epoch": 3.83, "learning_rate": 1e-07, "loss": 3.2348, "step": 5814 }, { "epoch": 3.85, "learning_rate": 1e-07, "loss": 3.3256, "step": 5852 }, { "epoch": 3.88, "learning_rate": 1e-07, "loss": 3.2392, "step": 5890 }, { "epoch": 3.9, "learning_rate": 1e-07, "loss": 3.2072, "step": 5928 }, { "epoch": 3.92, "learning_rate": 1e-07, "loss": 3.2969, "step": 5966 }, { "epoch": 3.95, "learning_rate": 1e-07, "loss": 3.2984, "step": 6004 }, { "epoch": 3.98, "learning_rate": 1e-07, "loss": 3.2254, "step": 6042 }, { "epoch": 4.0, "learning_rate": 1e-07, "loss": 3.3567, "step": 6080 }, { "epoch": 4.03, "learning_rate": 1e-07, "loss": 3.2742, "step": 6118 }, { "epoch": 4.05, "learning_rate": 1e-07, "loss": 3.3057, "step": 6156 }, { "epoch": 4.08, "learning_rate": 1e-07, "loss": 3.1336, "step": 6194 }, { "epoch": 4.1, "learning_rate": 1e-07, "loss": 3.2602, "step": 6232 }, { "epoch": 4.12, "learning_rate": 1e-07, "loss": 3.3183, "step": 6270 }, { "epoch": 4.15, "learning_rate": 1e-07, "loss": 3.2419, "step": 6308 }, { "epoch": 4.17, "learning_rate": 1e-07, "loss": 3.2496, "step": 6346 }, { "epoch": 4.2, "learning_rate": 1e-07, "loss": 3.2705, "step": 6384 }, { "epoch": 4.22, "learning_rate": 1e-07, "loss": 3.1646, "step": 6422 }, { "epoch": 4.25, "learning_rate": 1e-07, "loss": 3.1935, "step": 6460 }, { "epoch": 4.28, "learning_rate": 1e-07, "loss": 3.2376, "step": 6498 }, { "epoch": 4.3, "learning_rate": 1e-07, "loss": 3.1907, "step": 6536 }, { "epoch": 4.33, "learning_rate": 1e-07, "loss": 3.1895, "step": 6574 }, { "epoch": 4.35, "learning_rate": 1e-07, "loss": 3.2795, "step": 6612 }, { "epoch": 4.38, "learning_rate": 1e-07, "loss": 3.1587, "step": 6650 }, { "epoch": 4.4, "learning_rate": 1e-07, "loss": 3.2195, "step": 6688 }, { "epoch": 4.42, "learning_rate": 1e-07, "loss": 3.2117, "step": 6726 }, { "epoch": 4.45, "learning_rate": 1e-07, "loss": 3.1271, "step": 6764 }, { "epoch": 4.47, "learning_rate": 1e-07, "loss": 3.1652, "step": 6802 }, { "epoch": 4.5, "learning_rate": 1e-07, "loss": 3.1776, "step": 6840 }, { "epoch": 4.53, "learning_rate": 1e-07, "loss": 3.0865, "step": 6878 }, { "epoch": 4.55, "learning_rate": 1e-07, "loss": 3.1704, "step": 6916 }, { "epoch": 4.58, "learning_rate": 1e-07, "loss": 3.1358, "step": 6954 }, { "epoch": 4.6, "learning_rate": 1e-07, "loss": 3.1571, "step": 6992 }, { "epoch": 4.62, "learning_rate": 1e-07, "loss": 3.1921, "step": 7030 }, { "epoch": 4.65, "learning_rate": 1e-07, "loss": 3.1816, "step": 7068 }, { "epoch": 4.67, "learning_rate": 1e-07, "loss": 3.0454, "step": 7106 }, { "epoch": 4.7, "learning_rate": 1e-07, "loss": 3.0481, "step": 7144 }, { "epoch": 4.72, "learning_rate": 1e-07, "loss": 3.1092, "step": 7182 }, { "epoch": 4.75, "learning_rate": 1e-07, "loss": 3.1149, "step": 7220 }, { "epoch": 4.78, "learning_rate": 1e-07, "loss": 3.1654, "step": 7258 }, { "epoch": 4.8, "learning_rate": 1e-07, "loss": 3.1102, "step": 7296 }, { "epoch": 4.83, "learning_rate": 1e-07, "loss": 3.1573, "step": 7334 }, { "epoch": 4.85, "learning_rate": 1e-07, "loss": 3.0639, "step": 7372 }, { "epoch": 4.88, "learning_rate": 1e-07, "loss": 3.0567, "step": 7410 }, { "epoch": 4.9, "learning_rate": 1e-07, "loss": 3.0979, "step": 7448 }, { "epoch": 4.92, "learning_rate": 1e-07, "loss": 3.0217, "step": 7486 }, { "epoch": 4.95, "learning_rate": 1e-07, "loss": 3.0613, "step": 7524 }, { "epoch": 4.97, "learning_rate": 1e-07, "loss": 3.0275, "step": 7562 }, { "epoch": 5.0, "learning_rate": 1e-07, "loss": 3.0743, "step": 7600 }, { "epoch": 5.03, "learning_rate": 1e-07, "loss": 3.0545, "step": 7638 }, { "epoch": 5.05, "learning_rate": 1e-07, "loss": 3.0066, "step": 7676 }, { "epoch": 5.08, "learning_rate": 1e-07, "loss": 3.0473, "step": 7714 }, { "epoch": 5.1, "learning_rate": 1e-07, "loss": 3.0846, "step": 7752 }, { "epoch": 5.12, "learning_rate": 1e-07, "loss": 3.1315, "step": 7790 }, { "epoch": 5.15, "learning_rate": 1e-07, "loss": 2.9579, "step": 7828 }, { "epoch": 5.17, "learning_rate": 1e-07, "loss": 3.0408, "step": 7866 }, { "epoch": 5.2, "learning_rate": 1e-07, "loss": 3.0525, "step": 7904 }, { "epoch": 5.22, "learning_rate": 1e-07, "loss": 3.0084, "step": 7942 }, { "epoch": 5.25, "learning_rate": 1e-07, "loss": 3.0704, "step": 7980 }, { "epoch": 5.28, "learning_rate": 1e-07, "loss": 3.0312, "step": 8018 }, { "epoch": 5.3, "learning_rate": 1e-07, "loss": 2.9437, "step": 8056 }, { "epoch": 5.33, "learning_rate": 1e-07, "loss": 3.0565, "step": 8094 }, { "epoch": 5.35, "learning_rate": 1e-07, "loss": 2.9435, "step": 8132 }, { "epoch": 5.38, "learning_rate": 1e-07, "loss": 2.9414, "step": 8170 }, { "epoch": 5.4, "learning_rate": 1e-07, "loss": 3.0381, "step": 8208 }, { "epoch": 5.42, "learning_rate": 1e-07, "loss": 3.0162, "step": 8246 }, { "epoch": 5.45, "learning_rate": 1e-07, "loss": 3.0205, "step": 8284 }, { "epoch": 5.47, "learning_rate": 1e-07, "loss": 2.9968, "step": 8322 }, { "epoch": 5.5, "learning_rate": 1e-07, "loss": 3.0021, "step": 8360 }, { "epoch": 5.53, "learning_rate": 1e-07, "loss": 2.9997, "step": 8398 }, { "epoch": 5.55, "learning_rate": 1e-07, "loss": 3.0112, "step": 8436 }, { "epoch": 5.58, "learning_rate": 1e-07, "loss": 3.0385, "step": 8474 }, { "epoch": 5.6, "learning_rate": 1e-07, "loss": 2.9613, "step": 8512 }, { "epoch": 5.62, "learning_rate": 1e-07, "loss": 2.9484, "step": 8550 }, { "epoch": 5.65, "learning_rate": 1e-07, "loss": 2.979, "step": 8588 }, { "epoch": 5.67, "learning_rate": 1e-07, "loss": 2.9796, "step": 8626 }, { "epoch": 5.7, "learning_rate": 1e-07, "loss": 2.9801, "step": 8664 }, { "epoch": 5.72, "learning_rate": 1e-07, "loss": 3.0399, "step": 8702 }, { "epoch": 5.75, "learning_rate": 1e-07, "loss": 2.9223, "step": 8740 }, { "epoch": 5.78, "learning_rate": 1e-07, "loss": 3.0202, "step": 8778 }, { "epoch": 5.8, "learning_rate": 1e-07, "loss": 2.9057, "step": 8816 }, { "epoch": 5.83, "learning_rate": 1e-07, "loss": 2.9556, "step": 8854 }, { "epoch": 5.85, "learning_rate": 1e-07, "loss": 2.9582, "step": 8892 }, { "epoch": 5.88, "learning_rate": 1e-07, "loss": 2.8448, "step": 8930 }, { "epoch": 5.9, "learning_rate": 1e-07, "loss": 3.0643, "step": 8968 }, { "epoch": 5.92, "learning_rate": 1e-07, "loss": 2.8477, "step": 9006 }, { "epoch": 5.95, "learning_rate": 1e-07, "loss": 2.9684, "step": 9044 }, { "epoch": 5.97, "learning_rate": 1e-07, "loss": 2.9086, "step": 9082 }, { "epoch": 6.0, "learning_rate": 1e-07, "loss": 2.8426, "step": 9120 }, { "epoch": 6.03, "learning_rate": 1e-07, "loss": 2.903, "step": 9158 }, { "epoch": 6.05, "learning_rate": 1e-07, "loss": 2.8398, "step": 9196 }, { "epoch": 6.08, "learning_rate": 1e-07, "loss": 2.8839, "step": 9234 }, { "epoch": 6.1, "learning_rate": 1e-07, "loss": 2.9396, "step": 9272 }, { "epoch": 6.12, "learning_rate": 1e-07, "loss": 2.87, "step": 9310 }, { "epoch": 6.15, "learning_rate": 1e-07, "loss": 2.8619, "step": 9348 }, { "epoch": 6.17, "learning_rate": 1e-07, "loss": 2.8699, "step": 9386 }, { "epoch": 6.2, "learning_rate": 1e-07, "loss": 2.9366, "step": 9424 }, { "epoch": 6.22, "learning_rate": 1e-07, "loss": 2.895, "step": 9462 }, { "epoch": 6.25, "learning_rate": 1e-07, "loss": 2.8928, "step": 9500 }, { "epoch": 6.28, "learning_rate": 1e-07, "loss": 2.889, "step": 9538 }, { "epoch": 6.3, "learning_rate": 1e-07, "loss": 2.9291, "step": 9576 }, { "epoch": 6.33, "learning_rate": 1e-07, "loss": 2.8722, "step": 9614 }, { "epoch": 6.35, "learning_rate": 1e-07, "loss": 2.8362, "step": 9652 }, { "epoch": 6.38, "learning_rate": 1e-07, "loss": 2.8519, "step": 9690 }, { "epoch": 6.4, "learning_rate": 1e-07, "loss": 2.8364, "step": 9728 }, { "epoch": 6.42, "learning_rate": 1e-07, "loss": 2.9237, "step": 9766 }, { "epoch": 6.45, "learning_rate": 1e-07, "loss": 2.876, "step": 9804 }, { "epoch": 6.47, "learning_rate": 1e-07, "loss": 2.8436, "step": 9842 }, { "epoch": 6.5, "learning_rate": 1e-07, "loss": 2.8657, "step": 9880 }, { "epoch": 6.53, "learning_rate": 1e-07, "loss": 2.8283, "step": 9918 }, { "epoch": 6.55, "learning_rate": 1e-07, "loss": 2.8939, "step": 9956 }, { "epoch": 6.58, "learning_rate": 1e-07, "loss": 2.8956, "step": 9994 }, { "epoch": 6.6, "learning_rate": 1e-07, "loss": 2.8361, "step": 10032 }, { "epoch": 6.62, "learning_rate": 1e-07, "loss": 2.8309, "step": 10070 }, { "epoch": 6.65, "learning_rate": 1e-07, "loss": 2.8439, "step": 10108 }, { "epoch": 6.67, "learning_rate": 1e-07, "loss": 2.9308, "step": 10146 }, { "epoch": 6.7, "learning_rate": 1e-07, "loss": 2.8025, "step": 10184 }, { "epoch": 6.72, "learning_rate": 1e-07, "loss": 2.8796, "step": 10222 }, { "epoch": 6.75, "learning_rate": 1e-07, "loss": 2.8225, "step": 10260 }, { "epoch": 6.78, "learning_rate": 1e-07, "loss": 2.9773, "step": 10298 }, { "epoch": 6.8, "learning_rate": 1e-07, "loss": 2.8718, "step": 10336 }, { "epoch": 6.83, "learning_rate": 1e-07, "loss": 2.8718, "step": 10374 }, { "epoch": 6.85, "learning_rate": 1e-07, "loss": 2.8839, "step": 10412 }, { "epoch": 6.88, "learning_rate": 1e-07, "loss": 2.8241, "step": 10450 }, { "epoch": 6.9, "learning_rate": 1e-07, "loss": 2.811, "step": 10488 }, { "epoch": 6.92, "learning_rate": 1e-07, "loss": 2.8386, "step": 10526 }, { "epoch": 6.95, "learning_rate": 1e-07, "loss": 2.8042, "step": 10564 }, { "epoch": 6.97, "learning_rate": 1e-07, "loss": 2.7817, "step": 10602 }, { "epoch": 7.0, "learning_rate": 1e-07, "loss": 2.776, "step": 10640 }, { "epoch": 7.03, "learning_rate": 1e-07, "loss": 2.8606, "step": 10678 }, { "epoch": 7.05, "learning_rate": 1e-07, "loss": 2.8196, "step": 10716 }, { "epoch": 7.08, "learning_rate": 1e-07, "loss": 2.9032, "step": 10754 }, { "epoch": 7.1, "learning_rate": 1e-07, "loss": 2.8276, "step": 10792 }, { "epoch": 7.12, "learning_rate": 1e-07, "loss": 2.7629, "step": 10830 }, { "epoch": 7.15, "learning_rate": 1e-07, "loss": 2.7876, "step": 10868 }, { "epoch": 7.17, "learning_rate": 1e-07, "loss": 2.8237, "step": 10906 }, { "epoch": 7.2, "learning_rate": 1e-07, "loss": 2.8782, "step": 10944 }, { "epoch": 7.22, "learning_rate": 1e-07, "loss": 2.7644, "step": 10982 }, { "epoch": 7.25, "learning_rate": 1e-07, "loss": 2.7422, "step": 11020 }, { "epoch": 7.28, "learning_rate": 1e-07, "loss": 2.8169, "step": 11058 }, { "epoch": 7.3, "learning_rate": 1e-07, "loss": 2.8212, "step": 11096 }, { "epoch": 7.33, "learning_rate": 1e-07, "loss": 2.6941, "step": 11134 }, { "epoch": 7.35, "learning_rate": 1e-07, "loss": 2.8517, "step": 11172 }, { "epoch": 7.38, "learning_rate": 1e-07, "loss": 2.8385, "step": 11210 }, { "epoch": 7.4, "learning_rate": 1e-07, "loss": 2.755, "step": 11248 }, { "epoch": 7.42, "learning_rate": 1e-07, "loss": 2.7692, "step": 11286 }, { "epoch": 7.45, "learning_rate": 1e-07, "loss": 2.8242, "step": 11324 }, { "epoch": 7.47, "learning_rate": 1e-07, "loss": 2.7609, "step": 11362 }, { "epoch": 7.5, "learning_rate": 1e-07, "loss": 2.7633, "step": 11400 }, { "epoch": 7.53, "learning_rate": 1e-07, "loss": 2.7764, "step": 11438 }, { "epoch": 7.55, "learning_rate": 1e-07, "loss": 2.7956, "step": 11476 }, { "epoch": 7.58, "learning_rate": 1e-07, "loss": 2.7179, "step": 11514 }, { "epoch": 7.6, "learning_rate": 1e-07, "loss": 2.7766, "step": 11552 }, { "epoch": 7.62, "learning_rate": 1e-07, "loss": 2.8152, "step": 11590 }, { "epoch": 7.65, "learning_rate": 1e-07, "loss": 2.7367, "step": 11628 }, { "epoch": 7.67, "learning_rate": 1e-07, "loss": 2.7899, "step": 11666 }, { "epoch": 7.7, "learning_rate": 1e-07, "loss": 2.8211, "step": 11704 }, { "epoch": 7.72, "learning_rate": 1e-07, "loss": 2.7512, "step": 11742 }, { "epoch": 7.75, "learning_rate": 1e-07, "loss": 2.7689, "step": 11780 }, { "epoch": 7.78, "learning_rate": 1e-07, "loss": 2.7889, "step": 11818 }, { "epoch": 7.8, "learning_rate": 1e-07, "loss": 2.7053, "step": 11856 }, { "epoch": 7.83, "learning_rate": 1e-07, "loss": 2.7996, "step": 11894 }, { "epoch": 7.85, "learning_rate": 1e-07, "loss": 2.6374, "step": 11932 }, { "epoch": 7.88, "learning_rate": 1e-07, "loss": 2.7144, "step": 11970 }, { "epoch": 7.9, "learning_rate": 1e-07, "loss": 2.6325, "step": 12008 }, { "epoch": 7.92, "learning_rate": 1e-07, "loss": 2.7357, "step": 12046 }, { "epoch": 7.95, "learning_rate": 1e-07, "loss": 2.7822, "step": 12084 }, { "epoch": 7.97, "learning_rate": 1e-07, "loss": 2.7798, "step": 12122 }, { "epoch": 8.0, "learning_rate": 1e-07, "loss": 2.7708, "step": 12160 }, { "epoch": 8.03, "learning_rate": 1e-07, "loss": 2.7695, "step": 12198 }, { "epoch": 8.05, "learning_rate": 1e-07, "loss": 2.7509, "step": 12236 }, { "epoch": 8.07, "learning_rate": 1e-07, "loss": 2.6471, "step": 12274 }, { "epoch": 8.1, "learning_rate": 1e-07, "loss": 2.6833, "step": 12312 }, { "epoch": 8.12, "learning_rate": 1e-07, "loss": 2.737, "step": 12350 }, { "epoch": 8.15, "learning_rate": 1e-07, "loss": 2.7074, "step": 12388 }, { "epoch": 8.18, "learning_rate": 1e-07, "loss": 2.7347, "step": 12426 }, { "epoch": 8.2, "learning_rate": 1e-07, "loss": 2.6959, "step": 12464 }, { "epoch": 8.22, "learning_rate": 1e-07, "loss": 2.6782, "step": 12502 }, { "epoch": 8.25, "learning_rate": 1e-07, "loss": 2.727, "step": 12540 }, { "epoch": 8.28, "learning_rate": 1e-07, "loss": 2.6834, "step": 12578 }, { "epoch": 8.3, "learning_rate": 1e-07, "loss": 2.7919, "step": 12616 }, { "epoch": 8.32, "learning_rate": 1e-07, "loss": 2.7861, "step": 12654 }, { "epoch": 8.35, "learning_rate": 1e-07, "loss": 2.6642, "step": 12692 }, { "epoch": 8.38, "learning_rate": 1e-07, "loss": 2.7843, "step": 12730 }, { "epoch": 8.4, "learning_rate": 1e-07, "loss": 2.7456, "step": 12768 }, { "epoch": 8.43, "learning_rate": 1e-07, "loss": 2.749, "step": 12806 }, { "epoch": 8.45, "learning_rate": 1e-07, "loss": 2.6919, "step": 12844 }, { "epoch": 8.47, "learning_rate": 1e-07, "loss": 2.7122, "step": 12882 }, { "epoch": 8.5, "learning_rate": 1e-07, "loss": 2.6637, "step": 12920 }, { "epoch": 8.53, "learning_rate": 1e-07, "loss": 2.7101, "step": 12958 }, { "epoch": 8.55, "learning_rate": 1e-07, "loss": 2.716, "step": 12996 }, { "epoch": 8.57, "learning_rate": 1e-07, "loss": 2.698, "step": 13034 }, { "epoch": 8.6, "learning_rate": 1e-07, "loss": 2.7127, "step": 13072 }, { "epoch": 8.62, "learning_rate": 1e-07, "loss": 2.7368, "step": 13110 }, { "epoch": 8.65, "learning_rate": 1e-07, "loss": 2.6313, "step": 13148 }, { "epoch": 8.68, "learning_rate": 1e-07, "loss": 2.7304, "step": 13186 }, { "epoch": 8.7, "learning_rate": 1e-07, "loss": 2.7396, "step": 13224 }, { "epoch": 8.72, "learning_rate": 1e-07, "loss": 2.6746, "step": 13262 }, { "epoch": 8.75, "learning_rate": 1e-07, "loss": 2.6744, "step": 13300 }, { "epoch": 8.78, "learning_rate": 1e-07, "loss": 2.6228, "step": 13338 }, { "epoch": 8.8, "learning_rate": 1e-07, "loss": 2.7504, "step": 13376 }, { "epoch": 8.82, "learning_rate": 1e-07, "loss": 2.7281, "step": 13414 }, { "epoch": 8.85, "learning_rate": 1e-07, "loss": 2.7886, "step": 13452 }, { "epoch": 8.88, "learning_rate": 1e-07, "loss": 2.7505, "step": 13490 }, { "epoch": 8.9, "learning_rate": 1e-07, "loss": 2.7029, "step": 13528 }, { "epoch": 8.93, "learning_rate": 1e-07, "loss": 2.7687, "step": 13566 }, { "epoch": 8.95, "learning_rate": 1e-07, "loss": 2.6783, "step": 13604 }, { "epoch": 8.97, "learning_rate": 1e-07, "loss": 2.6507, "step": 13642 }, { "epoch": 9.0, "learning_rate": 1e-07, "loss": 2.673, "step": 13680 }, { "epoch": 9.03, "learning_rate": 1e-07, "loss": 2.6298, "step": 13718 }, { "epoch": 9.05, "learning_rate": 1e-07, "loss": 2.6612, "step": 13756 }, { "epoch": 9.07, "learning_rate": 1e-07, "loss": 2.7233, "step": 13794 }, { "epoch": 9.1, "learning_rate": 1e-07, "loss": 2.6386, "step": 13832 }, { "epoch": 9.12, "learning_rate": 1e-07, "loss": 2.6991, "step": 13870 }, { "epoch": 9.15, "learning_rate": 1e-07, "loss": 2.6629, "step": 13908 }, { "epoch": 9.18, "learning_rate": 1e-07, "loss": 2.6775, "step": 13946 }, { "epoch": 9.2, "learning_rate": 1e-07, "loss": 2.6804, "step": 13984 }, { "epoch": 9.22, "learning_rate": 1e-07, "loss": 2.6196, "step": 14022 }, { "epoch": 9.25, "learning_rate": 1e-07, "loss": 2.6489, "step": 14060 }, { "epoch": 9.28, "learning_rate": 1e-07, "loss": 2.7262, "step": 14098 }, { "epoch": 9.3, "learning_rate": 1e-07, "loss": 2.6845, "step": 14136 }, { "epoch": 9.32, "learning_rate": 1e-07, "loss": 2.6479, "step": 14174 }, { "epoch": 9.35, "learning_rate": 1e-07, "loss": 2.7273, "step": 14212 }, { "epoch": 9.38, "learning_rate": 1e-07, "loss": 2.6825, "step": 14250 }, { "epoch": 9.4, "learning_rate": 1e-07, "loss": 2.6207, "step": 14288 }, { "epoch": 9.43, "learning_rate": 1e-07, "loss": 2.6727, "step": 14326 }, { "epoch": 9.45, "learning_rate": 1e-07, "loss": 2.6411, "step": 14364 }, { "epoch": 9.47, "learning_rate": 1e-07, "loss": 2.7265, "step": 14402 }, { "epoch": 9.5, "learning_rate": 1e-07, "loss": 2.7001, "step": 14440 }, { "epoch": 9.53, "learning_rate": 1e-07, "loss": 2.699, "step": 14478 }, { "epoch": 9.55, "learning_rate": 1e-07, "loss": 2.612, "step": 14516 }, { "epoch": 9.57, "learning_rate": 1e-07, "loss": 2.6412, "step": 14554 }, { "epoch": 9.6, "learning_rate": 1e-07, "loss": 2.634, "step": 14592 }, { "epoch": 9.62, "learning_rate": 1e-07, "loss": 2.6596, "step": 14630 }, { "epoch": 9.65, "learning_rate": 1e-07, "loss": 2.702, "step": 14668 }, { "epoch": 9.68, "learning_rate": 1e-07, "loss": 2.692, "step": 14706 }, { "epoch": 9.7, "learning_rate": 1e-07, "loss": 2.6904, "step": 14744 }, { "epoch": 9.72, "learning_rate": 1e-07, "loss": 2.649, "step": 14782 }, { "epoch": 9.75, "learning_rate": 1e-07, "loss": 2.7208, "step": 14820 }, { "epoch": 9.78, "learning_rate": 1e-07, "loss": 2.6421, "step": 14858 }, { "epoch": 9.8, "learning_rate": 1e-07, "loss": 2.6062, "step": 14896 }, { "epoch": 9.82, "learning_rate": 1e-07, "loss": 2.6326, "step": 14934 }, { "epoch": 9.85, "learning_rate": 1e-07, "loss": 2.6574, "step": 14972 }, { "epoch": 9.88, "learning_rate": 1e-07, "loss": 2.6527, "step": 15010 }, { "epoch": 9.9, "learning_rate": 1e-07, "loss": 2.6796, "step": 15048 }, { "epoch": 9.93, "learning_rate": 1e-07, "loss": 2.543, "step": 15086 }, { "epoch": 9.95, "learning_rate": 1e-07, "loss": 2.6001, "step": 15124 }, { "epoch": 9.97, "learning_rate": 1e-07, "loss": 2.6147, "step": 15162 }, { "epoch": 10.0, "learning_rate": 1e-07, "loss": 2.6627, "step": 15200 }, { "epoch": 10.03, "learning_rate": 1e-07, "loss": 2.6809, "step": 15238 }, { "epoch": 10.05, "learning_rate": 1e-07, "loss": 2.6684, "step": 15276 }, { "epoch": 10.07, "learning_rate": 1e-07, "loss": 2.6266, "step": 15314 }, { "epoch": 10.1, "learning_rate": 1e-07, "loss": 2.6882, "step": 15352 }, { "epoch": 10.12, "learning_rate": 1e-07, "loss": 2.6337, "step": 15390 }, { "epoch": 10.15, "learning_rate": 1e-07, "loss": 2.6511, "step": 15428 }, { "epoch": 10.18, "learning_rate": 1e-07, "loss": 2.5565, "step": 15466 }, { "epoch": 10.2, "learning_rate": 1e-07, "loss": 2.6532, "step": 15504 }, { "epoch": 10.22, "learning_rate": 1e-07, "loss": 2.6808, "step": 15542 }, { "epoch": 10.25, "learning_rate": 1e-07, "loss": 2.6367, "step": 15580 }, { "epoch": 10.28, "learning_rate": 1e-07, "loss": 2.6816, "step": 15618 }, { "epoch": 10.3, "learning_rate": 1e-07, "loss": 2.5894, "step": 15656 }, { "epoch": 10.32, "learning_rate": 1e-07, "loss": 2.6045, "step": 15694 }, { "epoch": 10.35, "learning_rate": 1e-07, "loss": 2.6664, "step": 15732 }, { "epoch": 10.38, "learning_rate": 1e-07, "loss": 2.6359, "step": 15770 }, { "epoch": 10.4, "learning_rate": 1e-07, "loss": 2.595, "step": 15808 }, { "epoch": 10.43, "learning_rate": 1e-07, "loss": 2.7218, "step": 15846 }, { "epoch": 10.45, "learning_rate": 1e-07, "loss": 2.6015, "step": 15884 }, { "epoch": 10.47, "learning_rate": 1e-07, "loss": 2.6469, "step": 15922 }, { "epoch": 10.5, "learning_rate": 1e-07, "loss": 2.6632, "step": 15960 }, { "epoch": 10.53, "learning_rate": 1e-07, "loss": 2.6137, "step": 15998 }, { "epoch": 10.55, "learning_rate": 1e-07, "loss": 2.5723, "step": 16036 }, { "epoch": 10.57, "learning_rate": 1e-07, "loss": 2.5788, "step": 16074 }, { "epoch": 10.6, "learning_rate": 1e-07, "loss": 2.6213, "step": 16112 }, { "epoch": 10.62, "learning_rate": 1e-07, "loss": 2.6261, "step": 16150 }, { "epoch": 10.65, "learning_rate": 1e-07, "loss": 2.5937, "step": 16188 }, { "epoch": 10.68, "learning_rate": 1e-07, "loss": 2.5266, "step": 16226 }, { "epoch": 10.7, "learning_rate": 1e-07, "loss": 2.6844, "step": 16264 }, { "epoch": 10.72, "learning_rate": 1e-07, "loss": 2.5672, "step": 16302 }, { "epoch": 10.75, "learning_rate": 1e-07, "loss": 2.5905, "step": 16340 }, { "epoch": 10.78, "learning_rate": 1e-07, "loss": 2.5908, "step": 16378 }, { "epoch": 10.8, "learning_rate": 1e-07, "loss": 2.7097, "step": 16416 }, { "epoch": 10.82, "learning_rate": 1e-07, "loss": 2.6675, "step": 16454 }, { "epoch": 10.85, "learning_rate": 1e-07, "loss": 2.5913, "step": 16492 }, { "epoch": 10.88, "learning_rate": 1e-07, "loss": 2.6108, "step": 16530 }, { "epoch": 10.9, "learning_rate": 1e-07, "loss": 2.5413, "step": 16568 }, { "epoch": 10.93, "learning_rate": 1e-07, "loss": 2.6089, "step": 16606 }, { "epoch": 10.95, "learning_rate": 1e-07, "loss": 2.5889, "step": 16644 }, { "epoch": 10.97, "learning_rate": 1e-07, "loss": 2.5092, "step": 16682 }, { "epoch": 11.0, "learning_rate": 1e-07, "loss": 2.6164, "step": 16720 }, { "epoch": 11.03, "learning_rate": 1e-07, "loss": 2.6324, "step": 16758 }, { "epoch": 11.05, "learning_rate": 1e-07, "loss": 2.6398, "step": 16796 }, { "epoch": 11.07, "learning_rate": 1e-07, "loss": 2.6602, "step": 16834 }, { "epoch": 11.1, "learning_rate": 1e-07, "loss": 2.6362, "step": 16872 }, { "epoch": 11.12, "learning_rate": 1e-07, "loss": 2.574, "step": 16910 }, { "epoch": 11.15, "learning_rate": 1e-07, "loss": 2.5672, "step": 16948 }, { "epoch": 11.18, "learning_rate": 1e-07, "loss": 2.5726, "step": 16986 }, { "epoch": 11.2, "learning_rate": 1e-07, "loss": 2.6055, "step": 17024 }, { "epoch": 11.22, "learning_rate": 1e-07, "loss": 2.5427, "step": 17062 }, { "epoch": 11.25, "learning_rate": 1e-07, "loss": 2.5632, "step": 17100 }, { "epoch": 11.28, "learning_rate": 1e-07, "loss": 2.6167, "step": 17138 }, { "epoch": 11.3, "learning_rate": 1e-07, "loss": 2.584, "step": 17176 }, { "epoch": 11.32, "learning_rate": 1e-07, "loss": 2.6179, "step": 17214 }, { "epoch": 11.35, "learning_rate": 1e-07, "loss": 2.6232, "step": 17252 }, { "epoch": 11.38, "learning_rate": 1e-07, "loss": 2.6158, "step": 17290 }, { "epoch": 11.4, "learning_rate": 1e-07, "loss": 2.6115, "step": 17328 }, { "epoch": 11.43, "learning_rate": 1e-07, "loss": 2.5079, "step": 17366 }, { "epoch": 11.45, "learning_rate": 1e-07, "loss": 2.6525, "step": 17404 }, { "epoch": 11.47, "learning_rate": 1e-07, "loss": 2.5643, "step": 17442 }, { "epoch": 11.5, "learning_rate": 1e-07, "loss": 2.6215, "step": 17480 }, { "epoch": 11.53, "learning_rate": 1e-07, "loss": 2.5774, "step": 17518 }, { "epoch": 11.55, "learning_rate": 1e-07, "loss": 2.5696, "step": 17556 }, { "epoch": 11.57, "learning_rate": 1e-07, "loss": 2.5622, "step": 17594 }, { "epoch": 11.6, "learning_rate": 1e-07, "loss": 2.5572, "step": 17632 }, { "epoch": 11.62, "learning_rate": 1e-07, "loss": 2.5853, "step": 17670 }, { "epoch": 11.65, "learning_rate": 1e-07, "loss": 2.6487, "step": 17708 }, { "epoch": 11.68, "learning_rate": 1e-07, "loss": 2.5708, "step": 17746 }, { "epoch": 11.7, "learning_rate": 1e-07, "loss": 2.5757, "step": 17784 }, { "epoch": 11.72, "learning_rate": 1e-07, "loss": 2.5935, "step": 17822 }, { "epoch": 11.75, "learning_rate": 1e-07, "loss": 2.5212, "step": 17860 }, { "epoch": 11.78, "learning_rate": 1e-07, "loss": 2.6067, "step": 17898 }, { "epoch": 11.8, "learning_rate": 1e-07, "loss": 2.5712, "step": 17936 }, { "epoch": 11.82, "learning_rate": 1e-07, "loss": 2.5646, "step": 17974 }, { "epoch": 11.85, "learning_rate": 1e-07, "loss": 2.5789, "step": 18012 }, { "epoch": 11.88, "learning_rate": 1e-07, "loss": 2.6135, "step": 18050 }, { "epoch": 11.9, "learning_rate": 1e-07, "loss": 2.5586, "step": 18088 }, { "epoch": 11.93, "learning_rate": 1e-07, "loss": 2.5409, "step": 18126 }, { "epoch": 11.95, "learning_rate": 1e-07, "loss": 2.5607, "step": 18164 }, { "epoch": 11.97, "learning_rate": 1e-07, "loss": 2.5547, "step": 18202 }, { "epoch": 12.0, "learning_rate": 1e-07, "loss": 2.5457, "step": 18240 }, { "epoch": 12.03, "learning_rate": 1e-07, "loss": 2.5895, "step": 18278 }, { "epoch": 12.05, "learning_rate": 1e-07, "loss": 2.61, "step": 18316 }, { "epoch": 12.07, "learning_rate": 1e-07, "loss": 2.6503, "step": 18354 }, { "epoch": 12.1, "learning_rate": 1e-07, "loss": 2.5908, "step": 18392 }, { "epoch": 12.12, "learning_rate": 1e-07, "loss": 2.5728, "step": 18430 }, { "epoch": 12.15, "learning_rate": 1e-07, "loss": 2.5264, "step": 18468 }, { "epoch": 12.18, "learning_rate": 1e-07, "loss": 2.52, "step": 18506 }, { "epoch": 12.2, "learning_rate": 1e-07, "loss": 2.6832, "step": 18544 }, { "epoch": 12.22, "learning_rate": 1e-07, "loss": 2.6024, "step": 18582 }, { "epoch": 12.25, "learning_rate": 1e-07, "loss": 2.4557, "step": 18620 }, { "epoch": 12.28, "learning_rate": 1e-07, "loss": 2.6531, "step": 18658 }, { "epoch": 12.3, "learning_rate": 1e-07, "loss": 2.5052, "step": 18696 }, { "epoch": 12.32, "learning_rate": 1e-07, "loss": 2.4904, "step": 18734 }, { "epoch": 12.35, "learning_rate": 1e-07, "loss": 2.5336, "step": 18772 }, { "epoch": 12.38, "learning_rate": 1e-07, "loss": 2.5244, "step": 18810 }, { "epoch": 12.4, "learning_rate": 1e-07, "loss": 2.5641, "step": 18848 }, { "epoch": 12.43, "learning_rate": 1e-07, "loss": 2.5388, "step": 18886 }, { "epoch": 12.45, "learning_rate": 1e-07, "loss": 2.5346, "step": 18924 }, { "epoch": 12.47, "learning_rate": 1e-07, "loss": 2.5518, "step": 18962 }, { "epoch": 12.5, "learning_rate": 1e-07, "loss": 2.5202, "step": 19000 }, { "epoch": 12.53, "learning_rate": 1e-07, "loss": 2.5293, "step": 19038 }, { "epoch": 12.55, "learning_rate": 1e-07, "loss": 2.567, "step": 19076 }, { "epoch": 12.57, "learning_rate": 1e-07, "loss": 2.5357, "step": 19114 }, { "epoch": 12.6, "learning_rate": 1e-07, "loss": 2.5785, "step": 19152 }, { "epoch": 12.62, "learning_rate": 1e-07, "loss": 2.6327, "step": 19190 }, { "epoch": 12.65, "learning_rate": 1e-07, "loss": 2.6912, "step": 19228 }, { "epoch": 12.68, "learning_rate": 1e-07, "loss": 2.4824, "step": 19266 }, { "epoch": 12.7, "learning_rate": 1e-07, "loss": 2.5996, "step": 19304 }, { "epoch": 12.72, "learning_rate": 1e-07, "loss": 2.5246, "step": 19342 }, { "epoch": 12.75, "learning_rate": 1e-07, "loss": 2.5583, "step": 19380 }, { "epoch": 12.78, "learning_rate": 1e-07, "loss": 2.515, "step": 19418 }, { "epoch": 12.8, "learning_rate": 1e-07, "loss": 2.5677, "step": 19456 }, { "epoch": 12.82, "learning_rate": 1e-07, "loss": 2.5488, "step": 19494 }, { "epoch": 12.85, "learning_rate": 1e-07, "loss": 2.5562, "step": 19532 }, { "epoch": 12.88, "learning_rate": 1e-07, "loss": 2.544, "step": 19570 }, { "epoch": 12.9, "learning_rate": 1e-07, "loss": 2.5297, "step": 19608 }, { "epoch": 12.93, "learning_rate": 1e-07, "loss": 2.5091, "step": 19646 }, { "epoch": 12.95, "learning_rate": 1e-07, "loss": 2.5492, "step": 19684 }, { "epoch": 12.97, "learning_rate": 1e-07, "loss": 2.4553, "step": 19722 }, { "epoch": 13.0, "learning_rate": 1e-07, "loss": 2.5344, "step": 19760 }, { "epoch": 13.03, "learning_rate": 1e-07, "loss": 2.4807, "step": 19798 }, { "epoch": 13.05, "learning_rate": 1e-07, "loss": 2.4998, "step": 19836 }, { "epoch": 13.07, "learning_rate": 1e-07, "loss": 2.5033, "step": 19874 }, { "epoch": 13.1, "learning_rate": 1e-07, "loss": 2.5731, "step": 19912 }, { "epoch": 13.12, "learning_rate": 1e-07, "loss": 2.5362, "step": 19950 }, { "epoch": 13.15, "learning_rate": 1e-07, "loss": 2.5897, "step": 19988 }, { "epoch": 13.18, "learning_rate": 1e-07, "loss": 2.5419, "step": 20026 }, { "epoch": 13.2, "learning_rate": 1e-07, "loss": 2.5616, "step": 20064 }, { "epoch": 13.22, "learning_rate": 1e-07, "loss": 2.4845, "step": 20102 }, { "epoch": 13.25, "learning_rate": 1e-07, "loss": 2.5642, "step": 20140 }, { "epoch": 13.28, "learning_rate": 1e-07, "loss": 2.4496, "step": 20178 }, { "epoch": 13.3, "learning_rate": 1e-07, "loss": 2.5136, "step": 20216 }, { "epoch": 13.32, "learning_rate": 1e-07, "loss": 2.5947, "step": 20254 }, { "epoch": 13.35, "learning_rate": 1e-07, "loss": 2.5272, "step": 20292 }, { "epoch": 13.38, "learning_rate": 1e-07, "loss": 2.5801, "step": 20330 }, { "epoch": 13.4, "learning_rate": 1e-07, "loss": 2.4692, "step": 20368 }, { "epoch": 13.43, "learning_rate": 1e-07, "loss": 2.4849, "step": 20406 }, { "epoch": 13.45, "learning_rate": 1e-07, "loss": 2.5942, "step": 20444 }, { "epoch": 13.47, "learning_rate": 1e-07, "loss": 2.5341, "step": 20482 }, { "epoch": 13.5, "learning_rate": 1e-07, "loss": 2.4762, "step": 20520 }, { "epoch": 13.53, "learning_rate": 1e-07, "loss": 2.5744, "step": 20558 }, { "epoch": 13.55, "learning_rate": 1e-07, "loss": 2.5383, "step": 20596 }, { "epoch": 13.57, "learning_rate": 1e-07, "loss": 2.5769, "step": 20634 }, { "epoch": 13.6, "learning_rate": 1e-07, "loss": 2.5196, "step": 20672 }, { "epoch": 13.62, "learning_rate": 1e-07, "loss": 2.6015, "step": 20710 }, { "epoch": 13.65, "learning_rate": 1e-07, "loss": 2.5733, "step": 20748 }, { "epoch": 13.68, "learning_rate": 1e-07, "loss": 2.4808, "step": 20786 }, { "epoch": 13.7, "learning_rate": 1e-07, "loss": 2.4772, "step": 20824 }, { "epoch": 13.72, "learning_rate": 1e-07, "loss": 2.5212, "step": 20862 }, { "epoch": 13.75, "learning_rate": 1e-07, "loss": 2.5066, "step": 20900 }, { "epoch": 13.78, "learning_rate": 1e-07, "loss": 2.542, "step": 20938 }, { "epoch": 13.8, "learning_rate": 1e-07, "loss": 2.5422, "step": 20976 }, { "epoch": 13.82, "learning_rate": 1e-07, "loss": 2.5284, "step": 21014 }, { "epoch": 13.85, "learning_rate": 1e-07, "loss": 2.5472, "step": 21052 }, { "epoch": 13.88, "learning_rate": 1e-07, "loss": 2.6212, "step": 21090 }, { "epoch": 13.9, "learning_rate": 1e-07, "loss": 2.4717, "step": 21128 }, { "epoch": 13.93, "learning_rate": 1e-07, "loss": 2.5001, "step": 21166 }, { "epoch": 13.95, "learning_rate": 1e-07, "loss": 2.4892, "step": 21204 }, { "epoch": 13.97, "learning_rate": 1e-07, "loss": 2.4833, "step": 21242 }, { "epoch": 14.0, "learning_rate": 1e-07, "loss": 2.5363, "step": 21280 }, { "epoch": 14.03, "learning_rate": 1e-07, "loss": 2.5563, "step": 21318 }, { "epoch": 14.05, "learning_rate": 1e-07, "loss": 2.4392, "step": 21356 }, { "epoch": 14.07, "learning_rate": 1e-07, "loss": 2.4836, "step": 21394 }, { "epoch": 14.1, "learning_rate": 1e-07, "loss": 2.5705, "step": 21432 }, { "epoch": 14.12, "learning_rate": 1e-07, "loss": 2.494, "step": 21470 }, { "epoch": 14.15, "learning_rate": 1e-07, "loss": 2.5226, "step": 21508 }, { "epoch": 14.18, "learning_rate": 1e-07, "loss": 2.5026, "step": 21546 }, { "epoch": 14.2, "learning_rate": 1e-07, "loss": 2.4934, "step": 21584 }, { "epoch": 14.22, "learning_rate": 1e-07, "loss": 2.5057, "step": 21622 }, { "epoch": 14.25, "learning_rate": 1e-07, "loss": 2.4886, "step": 21660 }, { "epoch": 14.28, "learning_rate": 1e-07, "loss": 2.4816, "step": 21698 }, { "epoch": 14.3, "learning_rate": 1e-07, "loss": 2.4714, "step": 21736 }, { "epoch": 14.32, "learning_rate": 1e-07, "loss": 2.5459, "step": 21774 }, { "epoch": 14.35, "learning_rate": 1e-07, "loss": 2.549, "step": 21812 }, { "epoch": 14.38, "learning_rate": 1e-07, "loss": 2.4623, "step": 21850 }, { "epoch": 14.4, "learning_rate": 1e-07, "loss": 2.48, "step": 21888 }, { "epoch": 14.43, "learning_rate": 1e-07, "loss": 2.5529, "step": 21926 }, { "epoch": 14.45, "learning_rate": 1e-07, "loss": 2.5045, "step": 21964 }, { "epoch": 14.47, "learning_rate": 1e-07, "loss": 2.5599, "step": 22002 }, { "epoch": 14.5, "learning_rate": 1e-07, "loss": 2.4862, "step": 22040 }, { "epoch": 14.53, "learning_rate": 1e-07, "loss": 2.5145, "step": 22078 }, { "epoch": 14.55, "learning_rate": 1e-07, "loss": 2.4944, "step": 22116 }, { "epoch": 14.57, "learning_rate": 1e-07, "loss": 2.4581, "step": 22154 }, { "epoch": 14.6, "learning_rate": 1e-07, "loss": 2.552, "step": 22192 }, { "epoch": 14.62, "learning_rate": 1e-07, "loss": 2.5751, "step": 22230 }, { "epoch": 14.65, "learning_rate": 1e-07, "loss": 2.5383, "step": 22268 }, { "epoch": 14.68, "learning_rate": 1e-07, "loss": 2.5273, "step": 22306 }, { "epoch": 14.7, "learning_rate": 1e-07, "loss": 2.5118, "step": 22344 }, { "epoch": 14.72, "learning_rate": 1e-07, "loss": 2.5706, "step": 22382 }, { "epoch": 14.75, "learning_rate": 1e-07, "loss": 2.4765, "step": 22420 }, { "epoch": 14.78, "learning_rate": 1e-07, "loss": 2.4875, "step": 22458 }, { "epoch": 14.8, "learning_rate": 1e-07, "loss": 2.5111, "step": 22496 }, { "epoch": 14.82, "learning_rate": 1e-07, "loss": 2.4927, "step": 22534 }, { "epoch": 14.85, "learning_rate": 1e-07, "loss": 2.4156, "step": 22572 }, { "epoch": 14.88, "learning_rate": 1e-07, "loss": 2.5199, "step": 22610 }, { "epoch": 14.9, "learning_rate": 1e-07, "loss": 2.4974, "step": 22648 }, { "epoch": 14.93, "learning_rate": 1e-07, "loss": 2.5177, "step": 22686 }, { "epoch": 14.95, "learning_rate": 1e-07, "loss": 2.4438, "step": 22724 }, { "epoch": 14.97, "learning_rate": 1e-07, "loss": 2.4829, "step": 22762 }, { "epoch": 15.0, "learning_rate": 1e-07, "loss": 2.544, "step": 22800 }, { "epoch": 15.03, "learning_rate": 1e-07, "loss": 2.5036, "step": 22838 }, { "epoch": 15.05, "learning_rate": 1e-07, "loss": 2.4167, "step": 22876 }, { "epoch": 15.07, "learning_rate": 1e-07, "loss": 2.5017, "step": 22914 }, { "epoch": 15.1, "learning_rate": 1e-07, "loss": 2.4957, "step": 22952 }, { "epoch": 15.12, "learning_rate": 1e-07, "loss": 2.4176, "step": 22990 }, { "epoch": 15.15, "learning_rate": 1e-07, "loss": 2.535, "step": 23028 }, { "epoch": 15.18, "learning_rate": 1e-07, "loss": 2.5306, "step": 23066 }, { "epoch": 15.2, "learning_rate": 1e-07, "loss": 2.4867, "step": 23104 }, { "epoch": 15.22, "learning_rate": 1e-07, "loss": 2.5182, "step": 23142 }, { "epoch": 15.25, "learning_rate": 1e-07, "loss": 2.4443, "step": 23180 }, { "epoch": 15.28, "learning_rate": 1e-07, "loss": 2.4416, "step": 23218 }, { "epoch": 15.3, "learning_rate": 1e-07, "loss": 2.4665, "step": 23256 }, { "epoch": 15.32, "learning_rate": 1e-07, "loss": 2.4292, "step": 23294 }, { "epoch": 15.35, "learning_rate": 1e-07, "loss": 2.5607, "step": 23332 }, { "epoch": 15.38, "learning_rate": 1e-07, "loss": 2.4473, "step": 23370 }, { "epoch": 15.4, "learning_rate": 1e-07, "loss": 2.522, "step": 23408 }, { "epoch": 15.43, "learning_rate": 1e-07, "loss": 2.5633, "step": 23446 }, { "epoch": 15.45, "learning_rate": 1e-07, "loss": 2.4707, "step": 23484 }, { "epoch": 15.47, "learning_rate": 1e-07, "loss": 2.5822, "step": 23522 }, { "epoch": 15.5, "learning_rate": 1e-07, "loss": 2.4354, "step": 23560 }, { "epoch": 15.53, "learning_rate": 1e-07, "loss": 2.337, "step": 23598 }, { "epoch": 15.55, "learning_rate": 1e-07, "loss": 2.4321, "step": 23636 }, { "epoch": 15.57, "learning_rate": 1e-07, "loss": 2.453, "step": 23674 }, { "epoch": 15.6, "learning_rate": 1e-07, "loss": 2.4727, "step": 23712 }, { "epoch": 15.62, "learning_rate": 1e-07, "loss": 2.5775, "step": 23750 }, { "epoch": 15.65, "learning_rate": 1e-07, "loss": 2.4485, "step": 23788 }, { "epoch": 15.68, "learning_rate": 1e-07, "loss": 2.4905, "step": 23826 }, { "epoch": 15.7, "learning_rate": 1e-07, "loss": 2.486, "step": 23864 }, { "epoch": 15.72, "learning_rate": 1e-07, "loss": 2.4907, "step": 23902 }, { "epoch": 15.75, "learning_rate": 1e-07, "loss": 2.4584, "step": 23940 }, { "epoch": 15.78, "learning_rate": 1e-07, "loss": 2.5427, "step": 23978 }, { "epoch": 15.8, "learning_rate": 1e-07, "loss": 2.5468, "step": 24016 }, { "epoch": 15.82, "learning_rate": 1e-07, "loss": 2.5179, "step": 24054 }, { "epoch": 15.85, "learning_rate": 1e-07, "loss": 2.4527, "step": 24092 }, { "epoch": 15.88, "learning_rate": 1e-07, "loss": 2.4912, "step": 24130 }, { "epoch": 15.9, "learning_rate": 1e-07, "loss": 2.5246, "step": 24168 }, { "epoch": 15.93, "learning_rate": 1e-07, "loss": 2.4518, "step": 24206 }, { "epoch": 15.95, "learning_rate": 1e-07, "loss": 2.4702, "step": 24244 }, { "epoch": 15.97, "learning_rate": 1e-07, "loss": 2.4807, "step": 24282 }, { "epoch": 16.0, "learning_rate": 1e-07, "loss": 2.5016, "step": 24320 }, { "epoch": 16.02, "learning_rate": 1e-07, "loss": 2.4547, "step": 24358 }, { "epoch": 16.05, "learning_rate": 1e-07, "loss": 2.5046, "step": 24396 }, { "epoch": 16.07, "learning_rate": 1e-07, "loss": 2.4804, "step": 24434 }, { "epoch": 16.1, "learning_rate": 1e-07, "loss": 2.439, "step": 24472 }, { "epoch": 16.12, "learning_rate": 1e-07, "loss": 2.4343, "step": 24510 }, { "epoch": 16.15, "learning_rate": 1e-07, "loss": 2.5387, "step": 24548 }, { "epoch": 16.18, "learning_rate": 1e-07, "loss": 2.4459, "step": 24586 }, { "epoch": 16.2, "learning_rate": 1e-07, "loss": 2.4423, "step": 24624 }, { "epoch": 16.23, "learning_rate": 1e-07, "loss": 2.5521, "step": 24662 }, { "epoch": 16.25, "learning_rate": 1e-07, "loss": 2.5029, "step": 24700 }, { "epoch": 16.27, "learning_rate": 1e-07, "loss": 2.5005, "step": 24738 }, { "epoch": 16.3, "learning_rate": 1e-07, "loss": 2.4821, "step": 24776 }, { "epoch": 16.32, "learning_rate": 1e-07, "loss": 2.4868, "step": 24814 }, { "epoch": 16.35, "learning_rate": 1e-07, "loss": 2.4993, "step": 24852 }, { "epoch": 16.38, "learning_rate": 1e-07, "loss": 2.4132, "step": 24890 }, { "epoch": 16.4, "learning_rate": 1e-07, "loss": 2.4347, "step": 24928 }, { "epoch": 16.43, "learning_rate": 1e-07, "loss": 2.4667, "step": 24966 }, { "epoch": 16.45, "learning_rate": 1e-07, "loss": 2.4252, "step": 25004 }, { "epoch": 16.48, "learning_rate": 1e-07, "loss": 2.4267, "step": 25042 }, { "epoch": 16.5, "learning_rate": 1e-07, "loss": 2.4384, "step": 25080 }, { "epoch": 16.52, "learning_rate": 1e-07, "loss": 2.5273, "step": 25118 }, { "epoch": 16.55, "learning_rate": 1e-07, "loss": 2.5036, "step": 25156 }, { "epoch": 16.57, "learning_rate": 1e-07, "loss": 2.5159, "step": 25194 }, { "epoch": 16.6, "learning_rate": 1e-07, "loss": 2.4323, "step": 25232 }, { "epoch": 16.62, "learning_rate": 1e-07, "loss": 2.3885, "step": 25270 }, { "epoch": 16.65, "learning_rate": 1e-07, "loss": 2.4609, "step": 25308 }, { "epoch": 16.68, "learning_rate": 1e-07, "loss": 2.4227, "step": 25346 }, { "epoch": 16.7, "learning_rate": 1e-07, "loss": 2.5008, "step": 25384 }, { "epoch": 16.73, "learning_rate": 1e-07, "loss": 2.4119, "step": 25422 }, { "epoch": 16.75, "learning_rate": 1e-07, "loss": 2.4854, "step": 25460 }, { "epoch": 16.77, "learning_rate": 1e-07, "loss": 2.4073, "step": 25498 }, { "epoch": 16.8, "learning_rate": 1e-07, "loss": 2.4732, "step": 25536 }, { "epoch": 16.82, "learning_rate": 1e-07, "loss": 2.5109, "step": 25574 }, { "epoch": 16.85, "learning_rate": 1e-07, "loss": 2.4875, "step": 25612 }, { "epoch": 16.88, "learning_rate": 1e-07, "loss": 2.4114, "step": 25650 }, { "epoch": 16.9, "learning_rate": 1e-07, "loss": 2.5228, "step": 25688 }, { "epoch": 16.93, "learning_rate": 1e-07, "loss": 2.467, "step": 25726 }, { "epoch": 16.95, "learning_rate": 1e-07, "loss": 2.4497, "step": 25764 }, { "epoch": 16.98, "learning_rate": 1e-07, "loss": 2.4725, "step": 25802 }, { "epoch": 17.0, "learning_rate": 1e-07, "loss": 2.4368, "step": 25840 }, { "epoch": 17.02, "learning_rate": 1e-07, "loss": 2.4846, "step": 25878 }, { "epoch": 17.05, "learning_rate": 1e-07, "loss": 2.3977, "step": 25916 }, { "epoch": 17.07, "learning_rate": 1e-07, "loss": 2.4389, "step": 25954 }, { "epoch": 17.1, "learning_rate": 1e-07, "loss": 2.4865, "step": 25992 }, { "epoch": 17.12, "learning_rate": 1e-07, "loss": 2.3666, "step": 26030 }, { "epoch": 17.15, "learning_rate": 1e-07, "loss": 2.5045, "step": 26068 }, { "epoch": 17.18, "learning_rate": 1e-07, "loss": 2.4607, "step": 26106 }, { "epoch": 17.2, "learning_rate": 1e-07, "loss": 2.5089, "step": 26144 }, { "epoch": 17.23, "learning_rate": 1e-07, "loss": 2.4729, "step": 26182 }, { "epoch": 17.25, "learning_rate": 1e-07, "loss": 2.4228, "step": 26220 }, { "epoch": 17.27, "learning_rate": 1e-07, "loss": 2.4237, "step": 26258 }, { "epoch": 17.3, "learning_rate": 1e-07, "loss": 2.4593, "step": 26296 }, { "epoch": 17.32, "learning_rate": 1e-07, "loss": 2.4044, "step": 26334 }, { "epoch": 17.35, "learning_rate": 1e-07, "loss": 2.5488, "step": 26372 }, { "epoch": 17.38, "learning_rate": 1e-07, "loss": 2.3409, "step": 26410 }, { "epoch": 17.4, "learning_rate": 1e-07, "loss": 2.3963, "step": 26448 }, { "epoch": 17.43, "learning_rate": 1e-07, "loss": 2.4953, "step": 26486 }, { "epoch": 17.45, "learning_rate": 1e-07, "loss": 2.4604, "step": 26524 }, { "epoch": 17.48, "learning_rate": 1e-07, "loss": 2.4211, "step": 26562 }, { "epoch": 17.5, "learning_rate": 1e-07, "loss": 2.4791, "step": 26600 }, { "epoch": 17.52, "learning_rate": 1e-07, "loss": 2.442, "step": 26638 }, { "epoch": 17.55, "learning_rate": 1e-07, "loss": 2.4315, "step": 26676 }, { "epoch": 17.57, "learning_rate": 1e-07, "loss": 2.4432, "step": 26714 }, { "epoch": 17.6, "learning_rate": 1e-07, "loss": 2.4599, "step": 26752 }, { "epoch": 17.62, "learning_rate": 1e-07, "loss": 2.5794, "step": 26790 }, { "epoch": 17.65, "learning_rate": 1e-07, "loss": 2.4263, "step": 26828 }, { "epoch": 17.68, "learning_rate": 1e-07, "loss": 2.4788, "step": 26866 }, { "epoch": 17.7, "learning_rate": 1e-07, "loss": 2.4702, "step": 26904 }, { "epoch": 17.73, "learning_rate": 1e-07, "loss": 2.4099, "step": 26942 }, { "epoch": 17.75, "learning_rate": 1e-07, "loss": 2.3706, "step": 26980 }, { "epoch": 17.77, "learning_rate": 1e-07, "loss": 2.3648, "step": 27018 }, { "epoch": 17.8, "learning_rate": 1e-07, "loss": 2.4577, "step": 27056 }, { "epoch": 17.82, "learning_rate": 1e-07, "loss": 2.454, "step": 27094 }, { "epoch": 17.85, "learning_rate": 1e-07, "loss": 2.4376, "step": 27132 }, { "epoch": 17.88, "learning_rate": 1e-07, "loss": 2.4718, "step": 27170 }, { "epoch": 17.9, "learning_rate": 1e-07, "loss": 2.4469, "step": 27208 }, { "epoch": 17.93, "learning_rate": 1e-07, "loss": 2.4622, "step": 27246 }, { "epoch": 17.95, "learning_rate": 1e-07, "loss": 2.4444, "step": 27284 }, { "epoch": 17.98, "learning_rate": 1e-07, "loss": 2.4595, "step": 27322 }, { "epoch": 18.0, "learning_rate": 1e-07, "loss": 2.4624, "step": 27360 }, { "epoch": 18.0, "step": 27360, "total_flos": 4.063318154359603e+17, "train_loss": 3.0161131058520043, "train_runtime": 38619.075, "train_samples_per_second": 1.417, "train_steps_per_second": 0.708 } ], "max_steps": 27360, "num_train_epochs": 18, "total_flos": 4.063318154359603e+17, "trial_name": null, "trial_params": null }