{ "best_metric": null, "best_model_checkpoint": null, "epoch": 191.96969696969697, "global_step": 1536, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0, "loss": 3.6611, "step": 1 }, { "epoch": 0.48, "learning_rate": 0, "loss": 3.6226, "step": 4 }, { "epoch": 0.97, "learning_rate": 0.0001, "loss": 2.845, "step": 8 }, { "epoch": 1.48, "learning_rate": 0.0001, "loss": 1.6871, "step": 12 }, { "epoch": 1.97, "learning_rate": 0.0001, "loss": 0.6514, "step": 16 }, { "epoch": 2.48, "learning_rate": 0.0001, "loss": 0.4597, "step": 20 }, { "epoch": 2.97, "learning_rate": 0.0001, "loss": 0.346, "step": 24 }, { "epoch": 3.48, "learning_rate": 0.0001, "loss": 0.3093, "step": 28 }, { "epoch": 3.97, "learning_rate": 0.0001, "loss": 0.2487, "step": 32 }, { "epoch": 4.48, "learning_rate": 0.0001, "loss": 0.2344, "step": 36 }, { "epoch": 4.97, "learning_rate": 0.0001, "loss": 0.2048, "step": 40 }, { "epoch": 5.48, "learning_rate": 0.0001, "loss": 0.1839, "step": 44 }, { "epoch": 5.97, "learning_rate": 0.0001, "loss": 0.1714, "step": 48 }, { "epoch": 6.48, "learning_rate": 0.0001, "loss": 0.1553, "step": 52 }, { "epoch": 6.97, "learning_rate": 0.0001, "loss": 0.1323, "step": 56 }, { "epoch": 7.48, "learning_rate": 0.0001, "loss": 0.1243, "step": 60 }, { "epoch": 7.97, "learning_rate": 0.0001, "loss": 0.113, "step": 64 }, { "epoch": 7.97, "eval_exact_match": 0.5004965243296922, "eval_exec": 0.5292949354518371, "eval_loss": 0.184326171875, "eval_runtime": 2467.6178, "eval_samples_per_second": 0.527, "step": 64 }, { "epoch": 8.48, "learning_rate": 0.0001, "loss": 0.1088, "step": 68 }, { "epoch": 8.97, "learning_rate": 0.0001, "loss": 0.1037, "step": 72 }, { "epoch": 9.48, "learning_rate": 0.0001, "loss": 0.1025, "step": 76 }, { "epoch": 9.97, "learning_rate": 0.0001, "loss": 0.0885, "step": 80 }, { "epoch": 10.48, "learning_rate": 0.0001, "loss": 0.0879, "step": 84 }, { "epoch": 10.97, "learning_rate": 0.0001, "loss": 0.0847, "step": 88 }, { "epoch": 11.48, "learning_rate": 0.0001, "loss": 0.0817, "step": 92 }, { "epoch": 11.97, "learning_rate": 0.0001, "loss": 0.0731, "step": 96 }, { "epoch": 12.48, "learning_rate": 0.0001, "loss": 0.0701, "step": 100 }, { "epoch": 12.97, "learning_rate": 0.0001, "loss": 0.0697, "step": 104 }, { "epoch": 13.48, "learning_rate": 0.0001, "loss": 0.0632, "step": 108 }, { "epoch": 13.97, "learning_rate": 0.0001, "loss": 0.0581, "step": 112 }, { "epoch": 14.48, "learning_rate": 0.0001, "loss": 0.0603, "step": 116 }, { "epoch": 14.97, "learning_rate": 0.0001, "loss": 0.0575, "step": 120 }, { "epoch": 15.48, "learning_rate": 0.0001, "loss": 0.0595, "step": 124 }, { "epoch": 15.97, "learning_rate": 0.0001, "loss": 0.0513, "step": 128 }, { "epoch": 15.97, "eval_exact_match": 0.5431976166832175, "eval_exec": 0.5809334657398213, "eval_loss": 0.19482421875, "eval_runtime": 2620.0652, "eval_samples_per_second": 0.496, "step": 128 }, { "epoch": 16.48, "learning_rate": 0.0001, "loss": 0.0516, "step": 132 }, { "epoch": 16.97, "learning_rate": 0.0001, "loss": 0.0515, "step": 136 }, { "epoch": 17.48, "learning_rate": 0.0001, "loss": 0.0467, "step": 140 }, { "epoch": 17.97, "learning_rate": 0.0001, "loss": 0.0443, "step": 144 }, { "epoch": 18.48, "learning_rate": 0.0001, "loss": 0.0439, "step": 148 }, { "epoch": 18.97, "learning_rate": 0.0001, "loss": 0.0393, "step": 152 }, { "epoch": 19.48, "learning_rate": 0.0001, "loss": 0.0407, "step": 156 }, { "epoch": 19.97, "learning_rate": 0.0001, "loss": 0.0396, "step": 160 }, { "epoch": 20.48, "learning_rate": 0.0001, "loss": 0.0381, "step": 164 }, { "epoch": 20.97, "learning_rate": 0.0001, "loss": 0.0358, "step": 168 }, { "epoch": 21.48, "learning_rate": 0.0001, "loss": 0.0304, "step": 172 }, { "epoch": 21.97, "learning_rate": 0.0001, "loss": 0.0318, "step": 176 }, { "epoch": 22.48, "learning_rate": 0.0001, "loss": 0.0331, "step": 180 }, { "epoch": 22.97, "learning_rate": 0.0001, "loss": 0.0305, "step": 184 }, { "epoch": 23.48, "learning_rate": 0.0001, "loss": 0.0344, "step": 188 }, { "epoch": 23.97, "learning_rate": 0.0001, "loss": 0.0302, "step": 192 }, { "epoch": 23.97, "eval_exact_match": 0.5431976166832175, "eval_exec": 0.5789473684210527, "eval_loss": 0.2381591796875, "eval_runtime": 2596.5804, "eval_samples_per_second": 0.501, "step": 192 }, { "epoch": 24.48, "learning_rate": 0.0001, "loss": 0.0315, "step": 196 }, { "epoch": 24.97, "learning_rate": 0.0001, "loss": 0.0253, "step": 200 }, { "epoch": 25.48, "learning_rate": 0.0001, "loss": 0.0277, "step": 204 }, { "epoch": 25.97, "learning_rate": 0.0001, "loss": 0.0252, "step": 208 }, { "epoch": 26.48, "learning_rate": 0.0001, "loss": 0.0266, "step": 212 }, { "epoch": 26.97, "learning_rate": 0.0001, "loss": 0.0243, "step": 216 }, { "epoch": 27.48, "learning_rate": 0.0001, "loss": 0.0224, "step": 220 }, { "epoch": 27.97, "learning_rate": 0.0001, "loss": 0.024, "step": 224 }, { "epoch": 28.48, "learning_rate": 0.0001, "loss": 0.022, "step": 228 }, { "epoch": 28.97, "learning_rate": 0.0001, "loss": 0.02, "step": 232 }, { "epoch": 29.48, "learning_rate": 0.0001, "loss": 0.0191, "step": 236 }, { "epoch": 29.97, "learning_rate": 0.0001, "loss": 0.0213, "step": 240 }, { "epoch": 30.48, "learning_rate": 0.0001, "loss": 0.0195, "step": 244 }, { "epoch": 30.97, "learning_rate": 0.0001, "loss": 0.0191, "step": 248 }, { "epoch": 31.48, "learning_rate": 0.0001, "loss": 0.0185, "step": 252 }, { "epoch": 31.97, "learning_rate": 0.0001, "loss": 0.0163, "step": 256 }, { "epoch": 31.97, "eval_exact_match": 0.5521350546176763, "eval_exec": 0.5858987090367428, "eval_loss": 0.265625, "eval_runtime": 2579.5067, "eval_samples_per_second": 0.504, "step": 256 }, { "epoch": 32.48, "learning_rate": 0.0001, "loss": 0.0162, "step": 260 }, { "epoch": 32.97, "learning_rate": 0.0001, "loss": 0.0164, "step": 264 }, { "epoch": 33.48, "learning_rate": 0.0001, "loss": 0.0172, "step": 268 }, { "epoch": 33.97, "learning_rate": 0.0001, "loss": 0.0157, "step": 272 }, { "epoch": 34.48, "learning_rate": 0.0001, "loss": 0.0175, "step": 276 }, { "epoch": 34.97, "learning_rate": 0.0001, "loss": 0.0142, "step": 280 }, { "epoch": 35.48, "learning_rate": 0.0001, "loss": 0.0147, "step": 284 }, { "epoch": 35.97, "learning_rate": 0.0001, "loss": 0.0141, "step": 288 }, { "epoch": 36.48, "learning_rate": 0.0001, "loss": 0.0119, "step": 292 }, { "epoch": 36.97, "learning_rate": 0.0001, "loss": 0.0136, "step": 296 }, { "epoch": 37.48, "learning_rate": 0.0001, "loss": 0.0152, "step": 300 }, { "epoch": 37.97, "learning_rate": 0.0001, "loss": 0.0111, "step": 304 }, { "epoch": 38.48, "learning_rate": 0.0001, "loss": 0.0132, "step": 308 }, { "epoch": 38.97, "learning_rate": 0.0001, "loss": 0.0121, "step": 312 }, { "epoch": 39.48, "learning_rate": 0.0001, "loss": 0.0125, "step": 316 }, { "epoch": 39.97, "learning_rate": 0.0001, "loss": 0.0125, "step": 320 }, { "epoch": 39.97, "eval_exact_match": 0.5551142005958292, "eval_exec": 0.5779543197616683, "eval_loss": 0.29541015625, "eval_runtime": 2642.6168, "eval_samples_per_second": 0.492, "step": 320 }, { "epoch": 40.48, "learning_rate": 0.0001, "loss": 0.0107, "step": 324 }, { "epoch": 40.97, "learning_rate": 0.0001, "loss": 0.0109, "step": 328 }, { "epoch": 41.48, "learning_rate": 0.0001, "loss": 0.0099, "step": 332 }, { "epoch": 41.97, "learning_rate": 0.0001, "loss": 0.0103, "step": 336 }, { "epoch": 42.48, "learning_rate": 0.0001, "loss": 0.0114, "step": 340 }, { "epoch": 42.97, "learning_rate": 0.0001, "loss": 0.0094, "step": 344 }, { "epoch": 43.48, "learning_rate": 0.0001, "loss": 0.0086, "step": 348 }, { "epoch": 43.97, "learning_rate": 0.0001, "loss": 0.009, "step": 352 }, { "epoch": 44.48, "learning_rate": 0.0001, "loss": 0.0108, "step": 356 }, { "epoch": 44.97, "learning_rate": 0.0001, "loss": 0.0104, "step": 360 }, { "epoch": 45.48, "learning_rate": 0.0001, "loss": 0.0086, "step": 364 }, { "epoch": 45.97, "learning_rate": 0.0001, "loss": 0.0094, "step": 368 }, { "epoch": 46.48, "learning_rate": 0.0001, "loss": 0.0088, "step": 372 }, { "epoch": 46.97, "learning_rate": 0.0001, "loss": 0.0091, "step": 376 }, { "epoch": 47.48, "learning_rate": 0.0001, "loss": 0.0071, "step": 380 }, { "epoch": 47.97, "learning_rate": 0.0001, "loss": 0.0075, "step": 384 }, { "epoch": 47.97, "eval_exact_match": 0.5680238331678252, "eval_exec": 0.5918570009930486, "eval_loss": 0.32373046875, "eval_runtime": 2755.8621, "eval_samples_per_second": 0.472, "step": 384 }, { "epoch": 48.48, "learning_rate": 0.0001, "loss": 0.0075, "step": 388 }, { "epoch": 48.97, "learning_rate": 0.0001, "loss": 0.0063, "step": 392 }, { "epoch": 49.48, "learning_rate": 0.0001, "loss": 0.0067, "step": 396 }, { "epoch": 49.97, "learning_rate": 0.0001, "loss": 0.0067, "step": 400 }, { "epoch": 50.48, "learning_rate": 0.0001, "loss": 0.007, "step": 404 }, { "epoch": 50.97, "learning_rate": 0.0001, "loss": 0.0057, "step": 408 }, { "epoch": 51.48, "learning_rate": 0.0001, "loss": 0.0074, "step": 412 }, { "epoch": 51.97, "learning_rate": 0.0001, "loss": 0.0064, "step": 416 }, { "epoch": 52.48, "learning_rate": 0.0001, "loss": 0.0059, "step": 420 }, { "epoch": 52.97, "learning_rate": 0.0001, "loss": 0.0067, "step": 424 }, { "epoch": 53.48, "learning_rate": 0.0001, "loss": 0.0057, "step": 428 }, { "epoch": 53.97, "learning_rate": 0.0001, "loss": 0.0056, "step": 432 }, { "epoch": 54.48, "learning_rate": 0.0001, "loss": 0.0058, "step": 436 }, { "epoch": 54.97, "learning_rate": 0.0001, "loss": 0.0057, "step": 440 }, { "epoch": 55.48, "learning_rate": 0.0001, "loss": 0.0055, "step": 444 }, { "epoch": 55.97, "learning_rate": 0.0001, "loss": 0.0053, "step": 448 }, { "epoch": 55.97, "eval_exact_match": 0.5729890764647467, "eval_exec": 0.5948361469712016, "eval_loss": 0.349853515625, "eval_runtime": 2795.6483, "eval_samples_per_second": 0.465, "step": 448 }, { "epoch": 56.48, "learning_rate": 0.0001, "loss": 0.0055, "step": 452 }, { "epoch": 56.97, "learning_rate": 0.0001, "loss": 0.0049, "step": 456 }, { "epoch": 57.48, "learning_rate": 0.0001, "loss": 0.0055, "step": 460 }, { "epoch": 57.97, "learning_rate": 0.0001, "loss": 0.0049, "step": 464 }, { "epoch": 58.48, "learning_rate": 0.0001, "loss": 0.0056, "step": 468 }, { "epoch": 58.97, "learning_rate": 0.0001, "loss": 0.0048, "step": 472 }, { "epoch": 59.48, "learning_rate": 0.0001, "loss": 0.0052, "step": 476 }, { "epoch": 59.97, "learning_rate": 0.0001, "loss": 0.0049, "step": 480 }, { "epoch": 60.48, "learning_rate": 0.0001, "loss": 0.0053, "step": 484 }, { "epoch": 60.97, "learning_rate": 0.0001, "loss": 0.0047, "step": 488 }, { "epoch": 61.48, "learning_rate": 0.0001, "loss": 0.0056, "step": 492 }, { "epoch": 61.97, "learning_rate": 0.0001, "loss": 0.0044, "step": 496 }, { "epoch": 62.48, "learning_rate": 0.0001, "loss": 0.0039, "step": 500 }, { "epoch": 62.97, "learning_rate": 0.0001, "loss": 0.0047, "step": 504 }, { "epoch": 63.48, "learning_rate": 0.0001, "loss": 0.0048, "step": 508 }, { "epoch": 63.97, "learning_rate": 0.0001, "loss": 0.0043, "step": 512 }, { "epoch": 63.97, "eval_exact_match": 0.5719960278053625, "eval_exec": 0.5918570009930486, "eval_loss": 0.3505859375, "eval_runtime": 2583.794, "eval_samples_per_second": 0.503, "step": 512 }, { "epoch": 64.48, "learning_rate": 0.0001, "loss": 0.0046, "step": 516 }, { "epoch": 64.97, "learning_rate": 0.0001, "loss": 0.0043, "step": 520 }, { "epoch": 65.48, "learning_rate": 0.0001, "loss": 0.0053, "step": 524 }, { "epoch": 65.97, "learning_rate": 0.0001, "loss": 0.0036, "step": 528 }, { "epoch": 66.48, "learning_rate": 0.0001, "loss": 0.0043, "step": 532 }, { "epoch": 66.97, "learning_rate": 0.0001, "loss": 0.0046, "step": 536 }, { "epoch": 67.48, "learning_rate": 0.0001, "loss": 0.0043, "step": 540 }, { "epoch": 67.97, "learning_rate": 0.0001, "loss": 0.0042, "step": 544 }, { "epoch": 68.48, "learning_rate": 0.0001, "loss": 0.0034, "step": 548 }, { "epoch": 68.97, "learning_rate": 0.0001, "loss": 0.0033, "step": 552 }, { "epoch": 69.48, "learning_rate": 0.0001, "loss": 0.005, "step": 556 }, { "epoch": 69.97, "learning_rate": 0.0001, "loss": 0.0032, "step": 560 }, { "epoch": 70.48, "learning_rate": 0.0001, "loss": 0.0032, "step": 564 }, { "epoch": 70.97, "learning_rate": 0.0001, "loss": 0.0041, "step": 568 }, { "epoch": 71.48, "learning_rate": 0.0001, "loss": 0.0034, "step": 572 }, { "epoch": 71.97, "learning_rate": 0.0001, "loss": 0.0044, "step": 576 }, { "epoch": 71.97, "eval_exact_match": 0.5680238331678252, "eval_exec": 0.5968222442899702, "eval_loss": 0.38623046875, "eval_runtime": 2715.8229, "eval_samples_per_second": 0.479, "step": 576 }, { "epoch": 72.48, "learning_rate": 0.0001, "loss": 0.0031, "step": 580 }, { "epoch": 72.97, "learning_rate": 0.0001, "loss": 0.0037, "step": 584 }, { "epoch": 73.48, "learning_rate": 0.0001, "loss": 0.0036, "step": 588 }, { "epoch": 73.97, "learning_rate": 0.0001, "loss": 0.0037, "step": 592 }, { "epoch": 74.48, "learning_rate": 0.0001, "loss": 0.0032, "step": 596 }, { "epoch": 74.97, "learning_rate": 0.0001, "loss": 0.0037, "step": 600 }, { "epoch": 75.48, "learning_rate": 0.0001, "loss": 0.0032, "step": 604 }, { "epoch": 75.97, "learning_rate": 0.0001, "loss": 0.0033, "step": 608 }, { "epoch": 76.48, "learning_rate": 0.0001, "loss": 0.0026, "step": 612 }, { "epoch": 76.97, "learning_rate": 0.0001, "loss": 0.0032, "step": 616 }, { "epoch": 77.48, "learning_rate": 0.0001, "loss": 0.003, "step": 620 }, { "epoch": 77.97, "learning_rate": 0.0001, "loss": 0.0024, "step": 624 }, { "epoch": 78.48, "learning_rate": 0.0001, "loss": 0.0028, "step": 628 }, { "epoch": 78.97, "learning_rate": 0.0001, "loss": 0.0031, "step": 632 }, { "epoch": 79.48, "learning_rate": 0.0001, "loss": 0.0028, "step": 636 }, { "epoch": 79.97, "learning_rate": 0.0001, "loss": 0.0035, "step": 640 }, { "epoch": 79.97, "eval_exact_match": 0.564051638530288, "eval_exec": 0.5878848063555114, "eval_loss": 0.39404296875, "eval_runtime": 2640.1046, "eval_samples_per_second": 0.492, "step": 640 }, { "epoch": 80.48, "learning_rate": 0.0001, "loss": 0.0027, "step": 644 }, { "epoch": 80.97, "learning_rate": 0.0001, "loss": 0.003, "step": 648 }, { "epoch": 81.48, "learning_rate": 0.0001, "loss": 0.003, "step": 652 }, { "epoch": 81.97, "learning_rate": 0.0001, "loss": 0.0022, "step": 656 }, { "epoch": 82.48, "learning_rate": 0.0001, "loss": 0.0031, "step": 660 }, { "epoch": 82.97, "learning_rate": 0.0001, "loss": 0.0031, "step": 664 }, { "epoch": 83.48, "learning_rate": 0.0001, "loss": 0.0031, "step": 668 }, { "epoch": 83.97, "learning_rate": 0.0001, "loss": 0.0025, "step": 672 }, { "epoch": 84.48, "learning_rate": 0.0001, "loss": 0.0034, "step": 676 }, { "epoch": 84.97, "learning_rate": 0.0001, "loss": 0.0026, "step": 680 }, { "epoch": 85.48, "learning_rate": 0.0001, "loss": 0.003, "step": 684 }, { "epoch": 85.97, "learning_rate": 0.0001, "loss": 0.0027, "step": 688 }, { "epoch": 86.48, "learning_rate": 0.0001, "loss": 0.0029, "step": 692 }, { "epoch": 86.97, "learning_rate": 0.0001, "loss": 0.003, "step": 696 }, { "epoch": 87.48, "learning_rate": 0.0001, "loss": 0.0031, "step": 700 }, { "epoch": 87.97, "learning_rate": 0.0001, "loss": 0.0022, "step": 704 }, { "epoch": 87.97, "eval_exact_match": 0.5700099304865939, "eval_exec": 0.5938430983118173, "eval_loss": 0.399169921875, "eval_runtime": 2706.8779, "eval_samples_per_second": 0.48, "step": 704 }, { "epoch": 88.48, "learning_rate": 0.0001, "loss": 0.0033, "step": 708 }, { "epoch": 88.97, "learning_rate": 0.0001, "loss": 0.0026, "step": 712 }, { "epoch": 89.48, "learning_rate": 0.0001, "loss": 0.0023, "step": 716 }, { "epoch": 89.97, "learning_rate": 0.0001, "loss": 0.0022, "step": 720 }, { "epoch": 90.48, "learning_rate": 0.0001, "loss": 0.0023, "step": 724 }, { "epoch": 90.97, "learning_rate": 0.0001, "loss": 0.0029, "step": 728 }, { "epoch": 91.48, "learning_rate": 0.0001, "loss": 0.0029, "step": 732 }, { "epoch": 91.97, "learning_rate": 0.0001, "loss": 0.0028, "step": 736 }, { "epoch": 92.48, "learning_rate": 0.0001, "loss": 0.0025, "step": 740 }, { "epoch": 92.97, "learning_rate": 0.0001, "loss": 0.0024, "step": 744 }, { "epoch": 93.48, "learning_rate": 0.0001, "loss": 0.0035, "step": 748 }, { "epoch": 93.97, "learning_rate": 0.0001, "loss": 0.0021, "step": 752 }, { "epoch": 94.48, "learning_rate": 0.0001, "loss": 0.0021, "step": 756 }, { "epoch": 94.97, "learning_rate": 0.0001, "loss": 0.0027, "step": 760 }, { "epoch": 95.48, "learning_rate": 0.0001, "loss": 0.002, "step": 764 }, { "epoch": 95.97, "learning_rate": 0.0001, "loss": 0.002, "step": 768 }, { "epoch": 95.97, "eval_exact_match": 0.5630585898709036, "eval_exec": 0.5898709036742801, "eval_loss": 0.38916015625, "eval_runtime": 2439.1984, "eval_samples_per_second": 0.533, "step": 768 }, { "epoch": 96.48, "learning_rate": 0.0001, "loss": 0.0024, "step": 772 }, { "epoch": 96.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 776 }, { "epoch": 97.48, "learning_rate": 0.0001, "loss": 0.0025, "step": 780 }, { "epoch": 97.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 784 }, { "epoch": 98.48, "learning_rate": 0.0001, "loss": 0.0024, "step": 788 }, { "epoch": 98.97, "learning_rate": 0.0001, "loss": 0.0026, "step": 792 }, { "epoch": 99.48, "learning_rate": 0.0001, "loss": 0.0029, "step": 796 }, { "epoch": 99.97, "learning_rate": 0.0001, "loss": 0.0028, "step": 800 }, { "epoch": 100.48, "learning_rate": 0.0001, "loss": 0.0021, "step": 804 }, { "epoch": 100.97, "learning_rate": 0.0001, "loss": 0.0027, "step": 808 }, { "epoch": 101.48, "learning_rate": 0.0001, "loss": 0.0029, "step": 812 }, { "epoch": 101.97, "learning_rate": 0.0001, "loss": 0.0021, "step": 816 }, { "epoch": 102.48, "learning_rate": 0.0001, "loss": 0.0024, "step": 820 }, { "epoch": 102.97, "learning_rate": 0.0001, "loss": 0.0033, "step": 824 }, { "epoch": 103.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 828 }, { "epoch": 103.97, "learning_rate": 0.0001, "loss": 0.0024, "step": 832 }, { "epoch": 103.97, "eval_exact_match": 0.5620655412115194, "eval_exec": 0.6017874875868917, "eval_loss": 0.4052734375, "eval_runtime": 2462.89, "eval_samples_per_second": 0.528, "step": 832 }, { "epoch": 104.48, "learning_rate": 0.0001, "loss": 0.0026, "step": 836 }, { "epoch": 104.97, "learning_rate": 0.0001, "loss": 0.0023, "step": 840 }, { "epoch": 105.48, "learning_rate": 0.0001, "loss": 0.0018, "step": 844 }, { "epoch": 105.97, "learning_rate": 0.0001, "loss": 0.0021, "step": 848 }, { "epoch": 106.48, "learning_rate": 0.0001, "loss": 0.0021, "step": 852 }, { "epoch": 106.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 856 }, { "epoch": 107.48, "learning_rate": 0.0001, "loss": 0.0021, "step": 860 }, { "epoch": 107.97, "learning_rate": 0.0001, "loss": 0.002, "step": 864 }, { "epoch": 108.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 868 }, { "epoch": 108.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 872 }, { "epoch": 109.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 876 }, { "epoch": 109.97, "learning_rate": 0.0001, "loss": 0.0023, "step": 880 }, { "epoch": 110.48, "learning_rate": 0.0001, "loss": 0.0025, "step": 884 }, { "epoch": 110.97, "learning_rate": 0.0001, "loss": 0.0023, "step": 888 }, { "epoch": 111.48, "learning_rate": 0.0001, "loss": 0.0019, "step": 892 }, { "epoch": 111.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 896 }, { "epoch": 111.97, "eval_exact_match": 0.5690168818272096, "eval_exec": 0.5968222442899702, "eval_loss": 0.406005859375, "eval_runtime": 2444.0389, "eval_samples_per_second": 0.532, "step": 896 }, { "epoch": 112.48, "learning_rate": 0.0001, "loss": 0.0018, "step": 900 }, { "epoch": 112.97, "learning_rate": 0.0001, "loss": 0.0021, "step": 904 }, { "epoch": 113.48, "learning_rate": 0.0001, "loss": 0.0025, "step": 908 }, { "epoch": 113.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 912 }, { "epoch": 114.48, "learning_rate": 0.0001, "loss": 0.0021, "step": 916 }, { "epoch": 114.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 920 }, { "epoch": 115.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 924 }, { "epoch": 115.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 928 }, { "epoch": 116.48, "learning_rate": 0.0001, "loss": 0.002, "step": 932 }, { "epoch": 116.97, "learning_rate": 0.0001, "loss": 0.0023, "step": 936 }, { "epoch": 117.48, "learning_rate": 0.0001, "loss": 0.002, "step": 940 }, { "epoch": 117.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 944 }, { "epoch": 118.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 948 }, { "epoch": 118.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 952 }, { "epoch": 119.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 956 }, { "epoch": 119.97, "learning_rate": 0.0001, "loss": 0.002, "step": 960 }, { "epoch": 119.97, "eval_exact_match": 0.5680238331678252, "eval_exec": 0.5918570009930486, "eval_loss": 0.418701171875, "eval_runtime": 2472.2191, "eval_samples_per_second": 0.526, "step": 960 }, { "epoch": 120.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 964 }, { "epoch": 120.97, "learning_rate": 0.0001, "loss": 0.0017, "step": 968 }, { "epoch": 121.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 972 }, { "epoch": 121.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 976 }, { "epoch": 122.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 980 }, { "epoch": 122.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 984 }, { "epoch": 123.48, "learning_rate": 0.0001, "loss": 0.0016, "step": 988 }, { "epoch": 123.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 992 }, { "epoch": 124.48, "learning_rate": 0.0001, "loss": 0.001, "step": 996 }, { "epoch": 124.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1000 }, { "epoch": 125.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1004 }, { "epoch": 125.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 1008 }, { "epoch": 126.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1012 }, { "epoch": 126.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1016 }, { "epoch": 127.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 1020 }, { "epoch": 127.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1024 }, { "epoch": 127.97, "eval_exact_match": 0.5680238331678252, "eval_exec": 0.5938430983118173, "eval_loss": 0.42626953125, "eval_runtime": 2538.4241, "eval_samples_per_second": 0.512, "step": 1024 }, { "epoch": 128.48, "learning_rate": 0.0001, "loss": 0.0016, "step": 1028 }, { "epoch": 128.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1032 }, { "epoch": 129.48, "learning_rate": 0.0001, "loss": 0.0019, "step": 1036 }, { "epoch": 129.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 1040 }, { "epoch": 130.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1044 }, { "epoch": 130.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1048 }, { "epoch": 131.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1052 }, { "epoch": 131.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1056 }, { "epoch": 132.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1060 }, { "epoch": 132.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 1064 }, { "epoch": 133.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1068 }, { "epoch": 133.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1072 }, { "epoch": 134.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1076 }, { "epoch": 134.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1080 }, { "epoch": 135.48, "learning_rate": 0.0001, "loss": 0.0018, "step": 1084 }, { "epoch": 135.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1088 }, { "epoch": 135.97, "eval_exact_match": 0.5620655412115194, "eval_exec": 0.5908639523336644, "eval_loss": 0.4482421875, "eval_runtime": 2432.8996, "eval_samples_per_second": 0.534, "step": 1088 }, { "epoch": 136.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1092 }, { "epoch": 136.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 1096 }, { "epoch": 137.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1100 }, { "epoch": 137.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 1104 }, { "epoch": 138.48, "learning_rate": 0.0001, "loss": 0.0016, "step": 1108 }, { "epoch": 138.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1112 }, { "epoch": 139.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1116 }, { "epoch": 139.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1120 }, { "epoch": 140.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1124 }, { "epoch": 140.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 1128 }, { "epoch": 141.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1132 }, { "epoch": 141.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 1136 }, { "epoch": 142.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 1140 }, { "epoch": 142.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 1144 }, { "epoch": 143.48, "learning_rate": 0.0001, "loss": 0.0019, "step": 1148 }, { "epoch": 143.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1152 }, { "epoch": 143.97, "eval_exact_match": 0.5580933465739821, "eval_exec": 0.5888778550148958, "eval_loss": 0.448486328125, "eval_runtime": 2855.924, "eval_samples_per_second": 0.455, "step": 1152 }, { "epoch": 144.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1156 }, { "epoch": 144.97, "learning_rate": 0.0001, "loss": 0.0017, "step": 1160 }, { "epoch": 145.48, "learning_rate": 0.0001, "loss": 0.0015, "step": 1164 }, { "epoch": 145.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1168 }, { "epoch": 146.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1172 }, { "epoch": 146.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1176 }, { "epoch": 147.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1180 }, { "epoch": 147.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1184 }, { "epoch": 148.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1188 }, { "epoch": 148.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1192 }, { "epoch": 149.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1196 }, { "epoch": 149.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1200 }, { "epoch": 150.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1204 }, { "epoch": 150.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1208 }, { "epoch": 151.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1212 }, { "epoch": 151.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1216 }, { "epoch": 151.97, "eval_exact_match": 0.5571002979145978, "eval_exec": 0.5928500496524329, "eval_loss": 0.458984375, "eval_runtime": 2831.5441, "eval_samples_per_second": 0.459, "step": 1216 }, { "epoch": 152.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 1220 }, { "epoch": 152.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1224 }, { "epoch": 153.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1228 }, { "epoch": 153.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1232 }, { "epoch": 154.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1236 }, { "epoch": 154.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 1240 }, { "epoch": 155.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1244 }, { "epoch": 155.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1248 }, { "epoch": 156.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1252 }, { "epoch": 156.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1256 }, { "epoch": 157.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1260 }, { "epoch": 157.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1264 }, { "epoch": 158.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1268 }, { "epoch": 158.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1272 }, { "epoch": 159.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1276 }, { "epoch": 159.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1280 }, { "epoch": 159.97, "eval_exact_match": 0.5561072492552135, "eval_exec": 0.5888778550148958, "eval_loss": 0.45849609375, "eval_runtime": 2610.2922, "eval_samples_per_second": 0.498, "step": 1280 }, { "epoch": 160.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1284 }, { "epoch": 160.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1288 }, { "epoch": 161.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 1292 }, { "epoch": 161.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1296 }, { "epoch": 162.48, "learning_rate": 0.0001, "loss": 0.0015, "step": 1300 }, { "epoch": 162.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1304 }, { "epoch": 163.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1308 }, { "epoch": 163.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1312 }, { "epoch": 164.48, "learning_rate": 0.0001, "loss": 0.0028, "step": 1316 }, { "epoch": 164.97, "learning_rate": 0.0001, "loss": 0.0027, "step": 1320 }, { "epoch": 165.48, "learning_rate": 0.0001, "loss": 0.0022, "step": 1324 }, { "epoch": 165.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1328 }, { "epoch": 166.48, "learning_rate": 0.0001, "loss": 0.0016, "step": 1332 }, { "epoch": 166.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1336 }, { "epoch": 167.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 1340 }, { "epoch": 167.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1344 }, { "epoch": 167.97, "eval_exact_match": 0.5630585898709036, "eval_exec": 0.6037735849056604, "eval_loss": 0.431396484375, "eval_runtime": 2582.959, "eval_samples_per_second": 0.503, "step": 1344 }, { "epoch": 168.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1348 }, { "epoch": 168.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1352 }, { "epoch": 169.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1356 }, { "epoch": 169.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1360 }, { "epoch": 170.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1364 }, { "epoch": 170.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1368 }, { "epoch": 171.48, "learning_rate": 0.0001, "loss": 0.0006, "step": 1372 }, { "epoch": 171.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1376 }, { "epoch": 172.48, "learning_rate": 0.0001, "loss": 0.0007, "step": 1380 }, { "epoch": 172.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1384 }, { "epoch": 173.48, "learning_rate": 0.0001, "loss": 0.0014, "step": 1388 }, { "epoch": 173.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1392 }, { "epoch": 174.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1396 }, { "epoch": 174.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1400 }, { "epoch": 175.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 1404 }, { "epoch": 175.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1408 }, { "epoch": 175.97, "eval_exact_match": 0.5610724925521351, "eval_exec": 0.5978152929493545, "eval_loss": 0.4541015625, "eval_runtime": 2572.262, "eval_samples_per_second": 0.505, "step": 1408 }, { "epoch": 176.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 1412 }, { "epoch": 176.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1416 }, { "epoch": 177.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1420 }, { "epoch": 177.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1424 }, { "epoch": 178.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1428 }, { "epoch": 178.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1432 }, { "epoch": 179.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1436 }, { "epoch": 179.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 1440 }, { "epoch": 180.48, "learning_rate": 0.0001, "loss": 0.0007, "step": 1444 }, { "epoch": 180.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1448 }, { "epoch": 181.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1452 }, { "epoch": 181.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1456 }, { "epoch": 182.48, "learning_rate": 0.0001, "loss": 0.0007, "step": 1460 }, { "epoch": 182.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1464 }, { "epoch": 183.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1468 }, { "epoch": 183.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1472 }, { "epoch": 183.97, "eval_exact_match": 0.564051638530288, "eval_exec": 0.5958291956305859, "eval_loss": 0.468505859375, "eval_runtime": 2664.1072, "eval_samples_per_second": 0.488, "step": 1472 }, { "epoch": 184.48, "learning_rate": 0.0001, "loss": 0.0013, "step": 1476 }, { "epoch": 184.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1480 }, { "epoch": 185.48, "learning_rate": 0.0001, "loss": 0.0009, "step": 1484 }, { "epoch": 185.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1488 }, { "epoch": 186.48, "learning_rate": 0.0001, "loss": 0.0005, "step": 1492 }, { "epoch": 186.97, "learning_rate": 0.0001, "loss": 0.0016, "step": 1496 }, { "epoch": 187.48, "learning_rate": 0.0001, "loss": 0.0011, "step": 1500 }, { "epoch": 187.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 1504 }, { "epoch": 188.48, "learning_rate": 0.0001, "loss": 0.0017, "step": 1508 }, { "epoch": 188.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1512 }, { "epoch": 189.48, "learning_rate": 0.0001, "loss": 0.0012, "step": 1516 }, { "epoch": 189.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1520 }, { "epoch": 190.48, "learning_rate": 0.0001, "loss": 0.001, "step": 1524 }, { "epoch": 190.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1528 }, { "epoch": 191.48, "learning_rate": 0.0001, "loss": 0.0008, "step": 1532 }, { "epoch": 191.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1536 }, { "epoch": 191.97, "eval_exact_match": 0.5710029791459782, "eval_exec": 0.5998013902681232, "eval_loss": 0.466552734375, "eval_runtime": 2439.0394, "eval_samples_per_second": 0.533, "step": 1536 } ], "max_steps": 24576, "num_train_epochs": 3072, "total_flos": 6277475729408.0, "trial_name": null, "trial_params": null }