|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.0, |
|
"global_step": 27360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-07, |
|
"loss": 6.0775, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-07, |
|
"loss": 6.0462, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-07, |
|
"loss": 6.0014, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-07, |
|
"loss": 5.9744, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-07, |
|
"loss": 5.886, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-07, |
|
"loss": 5.8752, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-07, |
|
"loss": 5.8586, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-07, |
|
"loss": 5.7714, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-07, |
|
"loss": 5.7406, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-07, |
|
"loss": 5.7096, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-07, |
|
"loss": 5.6967, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-07, |
|
"loss": 5.6248, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-07, |
|
"loss": 5.5632, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-07, |
|
"loss": 5.5463, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-07, |
|
"loss": 5.518, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-07, |
|
"loss": 5.4135, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-07, |
|
"loss": 5.4107, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-07, |
|
"loss": 5.3979, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-07, |
|
"loss": 5.3563, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-07, |
|
"loss": 5.3835, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-07, |
|
"loss": 5.3005, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-07, |
|
"loss": 5.2324, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-07, |
|
"loss": 5.2447, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-07, |
|
"loss": 5.1957, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-07, |
|
"loss": 5.1413, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-07, |
|
"loss": 5.1094, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-07, |
|
"loss": 5.0822, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-07, |
|
"loss": 5.0826, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-07, |
|
"loss": 5.0354, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-07, |
|
"loss": 4.9899, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-07, |
|
"loss": 5.0207, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-07, |
|
"loss": 4.9249, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-07, |
|
"loss": 4.9205, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-07, |
|
"loss": 4.8866, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-07, |
|
"loss": 4.9054, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-07, |
|
"loss": 4.9223, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-07, |
|
"loss": 4.8158, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-07, |
|
"loss": 4.842, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-07, |
|
"loss": 4.7924, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1e-07, |
|
"loss": 4.7735, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1e-07, |
|
"loss": 4.803, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1e-07, |
|
"loss": 4.7616, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1e-07, |
|
"loss": 4.7375, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1e-07, |
|
"loss": 4.7046, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1e-07, |
|
"loss": 4.6259, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1e-07, |
|
"loss": 4.6591, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1e-07, |
|
"loss": 4.6459, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1e-07, |
|
"loss": 4.5775, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1e-07, |
|
"loss": 4.5543, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1e-07, |
|
"loss": 4.6039, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1e-07, |
|
"loss": 4.5725, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1e-07, |
|
"loss": 4.55, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4842, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4519, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4428, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4975, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4482, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1e-07, |
|
"loss": 4.439, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4258, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4161, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1e-07, |
|
"loss": 4.4324, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1e-07, |
|
"loss": 4.3676, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1e-07, |
|
"loss": 4.35, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1e-07, |
|
"loss": 4.3224, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1e-07, |
|
"loss": 4.3432, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2577, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1e-07, |
|
"loss": 4.385, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2489, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2986, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2423, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2712, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1e-07, |
|
"loss": 4.2693, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1e-07, |
|
"loss": 4.1746, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1e-07, |
|
"loss": 4.1345, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0853, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1e-07, |
|
"loss": 4.1348, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1e-07, |
|
"loss": 4.1303, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0622, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0644, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0578, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9785, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0323, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0035, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9697, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9225, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9822, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9636, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1e-07, |
|
"loss": 3.9574, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8573, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1e-07, |
|
"loss": 4.0107, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8145, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8424, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8641, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8606, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7903, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7378, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7713, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1e-07, |
|
"loss": 3.834, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7785, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1e-07, |
|
"loss": 3.8277, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7287, |
|
"step": 3838 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7123, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7185, |
|
"step": 3914 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6936, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1e-07, |
|
"loss": 3.7462, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6844, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6709, |
|
"step": 4066 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6508, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6247, |
|
"step": 4142 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6459, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6169, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6944, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6458, |
|
"step": 4294 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5598, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5221, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6462, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6155, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5649, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5088, |
|
"step": 4522 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6479, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5495, |
|
"step": 4598 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4842, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1e-07, |
|
"loss": 3.6004, |
|
"step": 4674 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4871, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5237, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5153, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4592, |
|
"step": 4826 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1e-07, |
|
"loss": 3.5256, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4191, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4433, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1e-07, |
|
"loss": 3.51, |
|
"step": 4978 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4299, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4375, |
|
"step": 5054 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4766, |
|
"step": 5092 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4243, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4521, |
|
"step": 5168 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4048, |
|
"step": 5206 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4903, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3147, |
|
"step": 5282 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4223, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3592, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4287, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2888, |
|
"step": 5434 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1e-07, |
|
"loss": 3.352, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1e-07, |
|
"loss": 3.352, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4211, |
|
"step": 5548 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1e-07, |
|
"loss": 3.4113, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2818, |
|
"step": 5624 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1e-07, |
|
"loss": 3.368, |
|
"step": 5662 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3881, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3362, |
|
"step": 5738 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3529, |
|
"step": 5776 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2348, |
|
"step": 5814 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3256, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2392, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2072, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2969, |
|
"step": 5966 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2984, |
|
"step": 6004 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2254, |
|
"step": 6042 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3567, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2742, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3057, |
|
"step": 6156 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1336, |
|
"step": 6194 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2602, |
|
"step": 6232 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1e-07, |
|
"loss": 3.3183, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2419, |
|
"step": 6308 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2496, |
|
"step": 6346 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2705, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1646, |
|
"step": 6422 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1935, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2376, |
|
"step": 6498 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1907, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1895, |
|
"step": 6574 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2795, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1587, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2195, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1e-07, |
|
"loss": 3.2117, |
|
"step": 6726 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1271, |
|
"step": 6764 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1652, |
|
"step": 6802 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1776, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0865, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1704, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1358, |
|
"step": 6954 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1571, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1921, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1816, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0454, |
|
"step": 7106 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0481, |
|
"step": 7144 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1092, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1149, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1654, |
|
"step": 7258 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1102, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1573, |
|
"step": 7334 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0639, |
|
"step": 7372 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0567, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0979, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0217, |
|
"step": 7486 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0613, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0275, |
|
"step": 7562 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0743, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0545, |
|
"step": 7638 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0066, |
|
"step": 7676 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0473, |
|
"step": 7714 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0846, |
|
"step": 7752 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1e-07, |
|
"loss": 3.1315, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9579, |
|
"step": 7828 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0408, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0525, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0084, |
|
"step": 7942 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0704, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0312, |
|
"step": 8018 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9437, |
|
"step": 8056 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0565, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9435, |
|
"step": 8132 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9414, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0381, |
|
"step": 8208 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0162, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0205, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9968, |
|
"step": 8322 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0021, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9997, |
|
"step": 8398 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0112, |
|
"step": 8436 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0385, |
|
"step": 8474 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9613, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9484, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.979, |
|
"step": 8588 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9796, |
|
"step": 8626 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9801, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0399, |
|
"step": 8702 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9223, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0202, |
|
"step": 8778 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9057, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9556, |
|
"step": 8854 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9582, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8448, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1e-07, |
|
"loss": 3.0643, |
|
"step": 8968 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8477, |
|
"step": 9006 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9684, |
|
"step": 9044 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9086, |
|
"step": 9082 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8426, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.903, |
|
"step": 9158 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8398, |
|
"step": 9196 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8839, |
|
"step": 9234 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9396, |
|
"step": 9272 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.87, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8619, |
|
"step": 9348 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8699, |
|
"step": 9386 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9366, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.895, |
|
"step": 9462 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8928, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.889, |
|
"step": 9538 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9291, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8722, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8362, |
|
"step": 9652 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8519, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8364, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9237, |
|
"step": 9766 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.876, |
|
"step": 9804 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8436, |
|
"step": 9842 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8657, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8283, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8939, |
|
"step": 9956 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8956, |
|
"step": 9994 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8361, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8309, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8439, |
|
"step": 10108 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9308, |
|
"step": 10146 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8025, |
|
"step": 10184 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8796, |
|
"step": 10222 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8225, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9773, |
|
"step": 10298 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8718, |
|
"step": 10336 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8718, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8839, |
|
"step": 10412 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8241, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.811, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8386, |
|
"step": 10526 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8042, |
|
"step": 10564 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7817, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.776, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8606, |
|
"step": 10678 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8196, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1e-07, |
|
"loss": 2.9032, |
|
"step": 10754 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8276, |
|
"step": 10792 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7629, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7876, |
|
"step": 10868 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8237, |
|
"step": 10906 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8782, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7644, |
|
"step": 10982 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7422, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8169, |
|
"step": 11058 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8212, |
|
"step": 11096 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6941, |
|
"step": 11134 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8517, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8385, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.755, |
|
"step": 11248 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7692, |
|
"step": 11286 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8242, |
|
"step": 11324 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7609, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7633, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7764, |
|
"step": 11438 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7956, |
|
"step": 11476 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7179, |
|
"step": 11514 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7766, |
|
"step": 11552 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8152, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7367, |
|
"step": 11628 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7899, |
|
"step": 11666 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.8211, |
|
"step": 11704 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7512, |
|
"step": 11742 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7689, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7889, |
|
"step": 11818 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7053, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7996, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6374, |
|
"step": 11932 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7144, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6325, |
|
"step": 12008 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7357, |
|
"step": 12046 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7822, |
|
"step": 12084 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7798, |
|
"step": 12122 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7708, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7695, |
|
"step": 12198 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7509, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6471, |
|
"step": 12274 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6833, |
|
"step": 12312 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.737, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7074, |
|
"step": 12388 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7347, |
|
"step": 12426 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6959, |
|
"step": 12464 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6782, |
|
"step": 12502 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.727, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6834, |
|
"step": 12578 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7919, |
|
"step": 12616 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7861, |
|
"step": 12654 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6642, |
|
"step": 12692 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7843, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7456, |
|
"step": 12768 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.749, |
|
"step": 12806 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6919, |
|
"step": 12844 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7122, |
|
"step": 12882 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6637, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7101, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.716, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.698, |
|
"step": 13034 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7127, |
|
"step": 13072 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7368, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6313, |
|
"step": 13148 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7304, |
|
"step": 13186 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7396, |
|
"step": 13224 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6746, |
|
"step": 13262 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6744, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6228, |
|
"step": 13338 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7504, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7281, |
|
"step": 13414 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7886, |
|
"step": 13452 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7505, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7029, |
|
"step": 13528 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7687, |
|
"step": 13566 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6783, |
|
"step": 13604 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6507, |
|
"step": 13642 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.673, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6298, |
|
"step": 13718 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6612, |
|
"step": 13756 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7233, |
|
"step": 13794 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6386, |
|
"step": 13832 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6991, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6629, |
|
"step": 13908 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6775, |
|
"step": 13946 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6804, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6196, |
|
"step": 14022 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6489, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7262, |
|
"step": 14098 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6845, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6479, |
|
"step": 14174 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7273, |
|
"step": 14212 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6825, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6207, |
|
"step": 14288 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6727, |
|
"step": 14326 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6411, |
|
"step": 14364 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7265, |
|
"step": 14402 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7001, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.699, |
|
"step": 14478 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.612, |
|
"step": 14516 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6412, |
|
"step": 14554 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.634, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6596, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.702, |
|
"step": 14668 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.692, |
|
"step": 14706 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6904, |
|
"step": 14744 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.649, |
|
"step": 14782 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7208, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6421, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6062, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6326, |
|
"step": 14934 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6574, |
|
"step": 14972 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6527, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6796, |
|
"step": 15048 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.543, |
|
"step": 15086 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6001, |
|
"step": 15124 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6147, |
|
"step": 15162 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6627, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6809, |
|
"step": 15238 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6684, |
|
"step": 15276 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6266, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6882, |
|
"step": 15352 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6337, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6511, |
|
"step": 15428 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5565, |
|
"step": 15466 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6532, |
|
"step": 15504 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6808, |
|
"step": 15542 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6367, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6816, |
|
"step": 15618 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5894, |
|
"step": 15656 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6045, |
|
"step": 15694 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6664, |
|
"step": 15732 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6359, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.595, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7218, |
|
"step": 15846 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6015, |
|
"step": 15884 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6469, |
|
"step": 15922 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6632, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6137, |
|
"step": 15998 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5723, |
|
"step": 16036 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5788, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6213, |
|
"step": 16112 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6261, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5937, |
|
"step": 16188 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5266, |
|
"step": 16226 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6844, |
|
"step": 16264 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5672, |
|
"step": 16302 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5905, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5908, |
|
"step": 16378 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.7097, |
|
"step": 16416 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6675, |
|
"step": 16454 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5913, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6108, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5413, |
|
"step": 16568 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6089, |
|
"step": 16606 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5889, |
|
"step": 16644 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5092, |
|
"step": 16682 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6164, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6324, |
|
"step": 16758 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6398, |
|
"step": 16796 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6602, |
|
"step": 16834 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6362, |
|
"step": 16872 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.574, |
|
"step": 16910 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5672, |
|
"step": 16948 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5726, |
|
"step": 16986 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6055, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5427, |
|
"step": 17062 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5632, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6167, |
|
"step": 17138 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.584, |
|
"step": 17176 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6179, |
|
"step": 17214 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6232, |
|
"step": 17252 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6158, |
|
"step": 17290 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6115, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5079, |
|
"step": 17366 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6525, |
|
"step": 17404 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5643, |
|
"step": 17442 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6215, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5774, |
|
"step": 17518 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5696, |
|
"step": 17556 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5622, |
|
"step": 17594 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5572, |
|
"step": 17632 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5853, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6487, |
|
"step": 17708 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5708, |
|
"step": 17746 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5757, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5935, |
|
"step": 17822 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5212, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6067, |
|
"step": 17898 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5712, |
|
"step": 17936 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5646, |
|
"step": 17974 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5789, |
|
"step": 18012 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6135, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5586, |
|
"step": 18088 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5409, |
|
"step": 18126 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5607, |
|
"step": 18164 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5547, |
|
"step": 18202 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5457, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5895, |
|
"step": 18278 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.61, |
|
"step": 18316 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6503, |
|
"step": 18354 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5908, |
|
"step": 18392 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5728, |
|
"step": 18430 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5264, |
|
"step": 18468 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.52, |
|
"step": 18506 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6832, |
|
"step": 18544 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6024, |
|
"step": 18582 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4557, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6531, |
|
"step": 18658 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5052, |
|
"step": 18696 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4904, |
|
"step": 18734 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5336, |
|
"step": 18772 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5244, |
|
"step": 18810 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5641, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5388, |
|
"step": 18886 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5346, |
|
"step": 18924 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5518, |
|
"step": 18962 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5202, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5293, |
|
"step": 19038 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.567, |
|
"step": 19076 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5357, |
|
"step": 19114 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5785, |
|
"step": 19152 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6327, |
|
"step": 19190 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6912, |
|
"step": 19228 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4824, |
|
"step": 19266 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5996, |
|
"step": 19304 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5246, |
|
"step": 19342 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5583, |
|
"step": 19380 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.515, |
|
"step": 19418 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5677, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5488, |
|
"step": 19494 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5562, |
|
"step": 19532 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.544, |
|
"step": 19570 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5297, |
|
"step": 19608 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5091, |
|
"step": 19646 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5492, |
|
"step": 19684 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4553, |
|
"step": 19722 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5344, |
|
"step": 19760 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4807, |
|
"step": 19798 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4998, |
|
"step": 19836 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5033, |
|
"step": 19874 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5731, |
|
"step": 19912 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5362, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5897, |
|
"step": 19988 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5419, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5616, |
|
"step": 20064 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4845, |
|
"step": 20102 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5642, |
|
"step": 20140 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4496, |
|
"step": 20178 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5136, |
|
"step": 20216 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5947, |
|
"step": 20254 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5272, |
|
"step": 20292 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5801, |
|
"step": 20330 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4692, |
|
"step": 20368 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4849, |
|
"step": 20406 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5942, |
|
"step": 20444 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5341, |
|
"step": 20482 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4762, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5744, |
|
"step": 20558 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5383, |
|
"step": 20596 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5769, |
|
"step": 20634 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5196, |
|
"step": 20672 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6015, |
|
"step": 20710 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5733, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4808, |
|
"step": 20786 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4772, |
|
"step": 20824 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5212, |
|
"step": 20862 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5066, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.542, |
|
"step": 20938 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5422, |
|
"step": 20976 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5284, |
|
"step": 21014 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5472, |
|
"step": 21052 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.6212, |
|
"step": 21090 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4717, |
|
"step": 21128 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5001, |
|
"step": 21166 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4892, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4833, |
|
"step": 21242 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5363, |
|
"step": 21280 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5563, |
|
"step": 21318 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4392, |
|
"step": 21356 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4836, |
|
"step": 21394 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5705, |
|
"step": 21432 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.494, |
|
"step": 21470 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5226, |
|
"step": 21508 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5026, |
|
"step": 21546 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4934, |
|
"step": 21584 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5057, |
|
"step": 21622 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4886, |
|
"step": 21660 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4816, |
|
"step": 21698 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4714, |
|
"step": 21736 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5459, |
|
"step": 21774 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.549, |
|
"step": 21812 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4623, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.48, |
|
"step": 21888 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5529, |
|
"step": 21926 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5045, |
|
"step": 21964 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5599, |
|
"step": 22002 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4862, |
|
"step": 22040 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5145, |
|
"step": 22078 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4944, |
|
"step": 22116 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4581, |
|
"step": 22154 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.552, |
|
"step": 22192 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5751, |
|
"step": 22230 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5383, |
|
"step": 22268 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5273, |
|
"step": 22306 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5118, |
|
"step": 22344 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5706, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4765, |
|
"step": 22420 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4875, |
|
"step": 22458 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5111, |
|
"step": 22496 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4927, |
|
"step": 22534 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4156, |
|
"step": 22572 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5199, |
|
"step": 22610 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4974, |
|
"step": 22648 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5177, |
|
"step": 22686 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4438, |
|
"step": 22724 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4829, |
|
"step": 22762 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.544, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5036, |
|
"step": 22838 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4167, |
|
"step": 22876 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5017, |
|
"step": 22914 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4957, |
|
"step": 22952 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4176, |
|
"step": 22990 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.535, |
|
"step": 23028 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5306, |
|
"step": 23066 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4867, |
|
"step": 23104 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5182, |
|
"step": 23142 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4443, |
|
"step": 23180 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4416, |
|
"step": 23218 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4665, |
|
"step": 23256 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4292, |
|
"step": 23294 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5607, |
|
"step": 23332 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4473, |
|
"step": 23370 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.522, |
|
"step": 23408 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5633, |
|
"step": 23446 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4707, |
|
"step": 23484 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5822, |
|
"step": 23522 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4354, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 1e-07, |
|
"loss": 2.337, |
|
"step": 23598 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4321, |
|
"step": 23636 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.453, |
|
"step": 23674 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4727, |
|
"step": 23712 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5775, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4485, |
|
"step": 23788 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4905, |
|
"step": 23826 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.486, |
|
"step": 23864 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4907, |
|
"step": 23902 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4584, |
|
"step": 23940 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5427, |
|
"step": 23978 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5468, |
|
"step": 24016 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5179, |
|
"step": 24054 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4527, |
|
"step": 24092 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4912, |
|
"step": 24130 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5246, |
|
"step": 24168 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4518, |
|
"step": 24206 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4702, |
|
"step": 24244 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4807, |
|
"step": 24282 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5016, |
|
"step": 24320 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4547, |
|
"step": 24358 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5046, |
|
"step": 24396 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4804, |
|
"step": 24434 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.439, |
|
"step": 24472 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4343, |
|
"step": 24510 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5387, |
|
"step": 24548 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4459, |
|
"step": 24586 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4423, |
|
"step": 24624 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5521, |
|
"step": 24662 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5029, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5005, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4821, |
|
"step": 24776 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4868, |
|
"step": 24814 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4993, |
|
"step": 24852 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4132, |
|
"step": 24890 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4347, |
|
"step": 24928 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4667, |
|
"step": 24966 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4252, |
|
"step": 25004 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4267, |
|
"step": 25042 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4384, |
|
"step": 25080 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5273, |
|
"step": 25118 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5036, |
|
"step": 25156 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5159, |
|
"step": 25194 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4323, |
|
"step": 25232 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3885, |
|
"step": 25270 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4609, |
|
"step": 25308 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4227, |
|
"step": 25346 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5008, |
|
"step": 25384 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4119, |
|
"step": 25422 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4854, |
|
"step": 25460 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4073, |
|
"step": 25498 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4732, |
|
"step": 25536 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5109, |
|
"step": 25574 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4875, |
|
"step": 25612 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4114, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5228, |
|
"step": 25688 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.467, |
|
"step": 25726 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4497, |
|
"step": 25764 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4725, |
|
"step": 25802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4368, |
|
"step": 25840 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4846, |
|
"step": 25878 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3977, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4389, |
|
"step": 25954 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4865, |
|
"step": 25992 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3666, |
|
"step": 26030 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5045, |
|
"step": 26068 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4607, |
|
"step": 26106 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5089, |
|
"step": 26144 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4729, |
|
"step": 26182 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4228, |
|
"step": 26220 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4237, |
|
"step": 26258 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4593, |
|
"step": 26296 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4044, |
|
"step": 26334 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5488, |
|
"step": 26372 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3409, |
|
"step": 26410 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3963, |
|
"step": 26448 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4953, |
|
"step": 26486 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4604, |
|
"step": 26524 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4211, |
|
"step": 26562 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4791, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 1e-07, |
|
"loss": 2.442, |
|
"step": 26638 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4315, |
|
"step": 26676 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4432, |
|
"step": 26714 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4599, |
|
"step": 26752 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 1e-07, |
|
"loss": 2.5794, |
|
"step": 26790 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4263, |
|
"step": 26828 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4788, |
|
"step": 26866 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4702, |
|
"step": 26904 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4099, |
|
"step": 26942 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3706, |
|
"step": 26980 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3648, |
|
"step": 27018 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4577, |
|
"step": 27056 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 1e-07, |
|
"loss": 2.454, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4376, |
|
"step": 27132 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4718, |
|
"step": 27170 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4469, |
|
"step": 27208 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4622, |
|
"step": 27246 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4444, |
|
"step": 27284 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4595, |
|
"step": 27322 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1e-07, |
|
"loss": 2.4624, |
|
"step": 27360 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 27360, |
|
"total_flos": 4.063318154359603e+17, |
|
"train_loss": 3.0161131058520043, |
|
"train_runtime": 38619.075, |
|
"train_samples_per_second": 1.417, |
|
"train_steps_per_second": 0.708 |
|
} |
|
], |
|
"max_steps": 27360, |
|
"num_train_epochs": 18, |
|
"total_flos": 4.063318154359603e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|