{ "best_metric": 0.5253227408142999, "best_model_checkpoint": "/experiments/cosql/1ccc8b19-d4cc-42ca-b70f-7d796303d2e5/checkpoint-1856", "epoch": 231.9922480620155, "global_step": 1856, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.0001, "loss": 3.2653, "step": 1 }, { "epoch": 0.5, "learning_rate": 0.0001, "loss": 2.2224, "step": 4 }, { "epoch": 0.99, "learning_rate": 0.0001, "loss": 1.1652, "step": 8 }, { "epoch": 1.5, "learning_rate": 0.0001, "loss": 0.8035, "step": 12 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 0.642, "step": 16 }, { "epoch": 2.5, "learning_rate": 0.0001, "loss": 0.537, "step": 20 }, { "epoch": 2.99, "learning_rate": 0.0001, "loss": 0.4739, "step": 24 }, { "epoch": 3.5, "learning_rate": 0.0001, "loss": 0.4251, "step": 28 }, { "epoch": 3.99, "learning_rate": 0.0001, "loss": 0.3945, "step": 32 }, { "epoch": 4.5, "learning_rate": 0.0001, "loss": 0.352, "step": 36 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 0.3245, "step": 40 }, { "epoch": 5.5, "learning_rate": 0.0001, "loss": 0.2948, "step": 44 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 0.287, "step": 48 }, { "epoch": 6.5, "learning_rate": 0.0001, "loss": 0.2686, "step": 52 }, { "epoch": 6.99, "learning_rate": 0.0001, "loss": 0.2745, "step": 56 }, { "epoch": 7.5, "learning_rate": 0.0001, "loss": 0.2495, "step": 60 }, { "epoch": 7.99, "learning_rate": 0.0001, "loss": 0.2298, "step": 64 }, { "epoch": 7.99, "eval_exact_match": 0.3644488579940417, "eval_exec": 0.3902681231380338, "eval_loss": 0.23653237521648407, "eval_runtime": 724.5845, "eval_samples_per_second": 1.794, "step": 64 }, { "epoch": 8.5, "learning_rate": 0.0001, "loss": 0.2242, "step": 68 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 0.205, "step": 72 }, { "epoch": 9.5, "learning_rate": 0.0001, "loss": 0.1932, "step": 76 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 0.1937, "step": 80 }, { "epoch": 10.5, "learning_rate": 0.0001, "loss": 0.1978, "step": 84 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 0.1747, "step": 88 }, { "epoch": 11.5, "learning_rate": 0.0001, "loss": 0.1669, "step": 92 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 0.1734, "step": 96 }, { "epoch": 12.5, "learning_rate": 0.0001, "loss": 0.1615, "step": 100 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 0.1551, "step": 104 }, { "epoch": 13.5, "learning_rate": 0.0001, "loss": 0.1519, "step": 108 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 0.1476, "step": 112 }, { "epoch": 14.5, "learning_rate": 0.0001, "loss": 0.1483, "step": 116 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 0.1478, "step": 120 }, { "epoch": 15.5, "learning_rate": 0.0001, "loss": 0.1292, "step": 124 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 0.1285, "step": 128 }, { "epoch": 15.99, "eval_exact_match": 0.4170804369414101, "eval_exec": 0.45878848063555117, "eval_loss": 0.20163151621818542, "eval_runtime": 846.5253, "eval_samples_per_second": 1.536, "step": 128 }, { "epoch": 16.5, "learning_rate": 0.0001, "loss": 0.1407, "step": 132 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 0.1228, "step": 136 }, { "epoch": 17.5, "learning_rate": 0.0001, "loss": 0.1324, "step": 140 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 0.1214, "step": 144 }, { "epoch": 18.5, "learning_rate": 0.0001, "loss": 0.1224, "step": 148 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 0.1128, "step": 152 }, { "epoch": 19.5, "learning_rate": 0.0001, "loss": 0.1016, "step": 156 }, { "epoch": 19.99, "learning_rate": 0.0001, "loss": 0.107, "step": 160 }, { "epoch": 20.5, "learning_rate": 0.0001, "loss": 0.1116, "step": 164 }, { "epoch": 20.99, "learning_rate": 0.0001, "loss": 0.0997, "step": 168 }, { "epoch": 21.5, "learning_rate": 0.0001, "loss": 0.1025, "step": 172 }, { "epoch": 21.99, "learning_rate": 0.0001, "loss": 0.0973, "step": 176 }, { "epoch": 22.5, "learning_rate": 0.0001, "loss": 0.0996, "step": 180 }, { "epoch": 22.99, "learning_rate": 0.0001, "loss": 0.0917, "step": 184 }, { "epoch": 23.5, "learning_rate": 0.0001, "loss": 0.0899, "step": 188 }, { "epoch": 23.99, "learning_rate": 0.0001, "loss": 0.0896, "step": 192 }, { "epoch": 23.99, "eval_exact_match": 0.4816285998013903, "eval_exec": 0.5104270109235353, "eval_loss": 0.19154271483421326, "eval_runtime": 788.1761, "eval_samples_per_second": 1.649, "step": 192 }, { "epoch": 24.5, "learning_rate": 0.0001, "loss": 0.0909, "step": 196 }, { "epoch": 24.99, "learning_rate": 0.0001, "loss": 0.0867, "step": 200 }, { "epoch": 25.5, "learning_rate": 0.0001, "loss": 0.089, "step": 204 }, { "epoch": 25.99, "learning_rate": 0.0001, "loss": 0.0888, "step": 208 }, { "epoch": 26.5, "learning_rate": 0.0001, "loss": 0.0946, "step": 212 }, { "epoch": 26.99, "learning_rate": 0.0001, "loss": 0.0803, "step": 216 }, { "epoch": 27.5, "learning_rate": 0.0001, "loss": 0.0769, "step": 220 }, { "epoch": 27.99, "learning_rate": 0.0001, "loss": 0.078, "step": 224 }, { "epoch": 28.5, "learning_rate": 0.0001, "loss": 0.084, "step": 228 }, { "epoch": 28.99, "learning_rate": 0.0001, "loss": 0.07, "step": 232 }, { "epoch": 29.5, "learning_rate": 0.0001, "loss": 0.0727, "step": 236 }, { "epoch": 29.99, "learning_rate": 0.0001, "loss": 0.0719, "step": 240 }, { "epoch": 30.5, "learning_rate": 0.0001, "loss": 0.0727, "step": 244 }, { "epoch": 30.99, "learning_rate": 0.0001, "loss": 0.0759, "step": 248 }, { "epoch": 31.5, "learning_rate": 0.0001, "loss": 0.0708, "step": 252 }, { "epoch": 31.99, "learning_rate": 0.0001, "loss": 0.0674, "step": 256 }, { "epoch": 31.99, "eval_exact_match": 0.5014895729890765, "eval_exec": 0.5382323733862959, "eval_loss": 0.19477179646492004, "eval_runtime": 786.6121, "eval_samples_per_second": 1.653, "step": 256 }, { "epoch": 32.5, "learning_rate": 0.0001, "loss": 0.0673, "step": 260 }, { "epoch": 32.99, "learning_rate": 0.0001, "loss": 0.0716, "step": 264 }, { "epoch": 33.5, "learning_rate": 0.0001, "loss": 0.0658, "step": 268 }, { "epoch": 33.99, "learning_rate": 0.0001, "loss": 0.0564, "step": 272 }, { "epoch": 34.5, "learning_rate": 0.0001, "loss": 0.0604, "step": 276 }, { "epoch": 34.99, "learning_rate": 0.0001, "loss": 0.0632, "step": 280 }, { "epoch": 35.5, "learning_rate": 0.0001, "loss": 0.0611, "step": 284 }, { "epoch": 35.99, "learning_rate": 0.0001, "loss": 0.0577, "step": 288 }, { "epoch": 36.5, "learning_rate": 0.0001, "loss": 0.0602, "step": 292 }, { "epoch": 36.99, "learning_rate": 0.0001, "loss": 0.0556, "step": 296 }, { "epoch": 37.5, "learning_rate": 0.0001, "loss": 0.0588, "step": 300 }, { "epoch": 37.99, "learning_rate": 0.0001, "loss": 0.0584, "step": 304 }, { "epoch": 38.5, "learning_rate": 0.0001, "loss": 0.0537, "step": 308 }, { "epoch": 38.99, "learning_rate": 0.0001, "loss": 0.0538, "step": 312 }, { "epoch": 39.5, "learning_rate": 0.0001, "loss": 0.0517, "step": 316 }, { "epoch": 39.99, "learning_rate": 0.0001, "loss": 0.0542, "step": 320 }, { "epoch": 39.99, "eval_exact_match": 0.5173783515392254, "eval_exec": 0.5501489572989077, "eval_loss": 0.21476374566555023, "eval_runtime": 722.7734, "eval_samples_per_second": 1.799, "step": 320 }, { "epoch": 40.5, "learning_rate": 0.0001, "loss": 0.052, "step": 324 }, { "epoch": 40.99, "learning_rate": 0.0001, "loss": 0.0552, "step": 328 }, { "epoch": 41.5, "learning_rate": 0.0001, "loss": 0.0474, "step": 332 }, { "epoch": 41.99, "learning_rate": 0.0001, "loss": 0.0456, "step": 336 }, { "epoch": 42.5, "learning_rate": 0.0001, "loss": 0.0517, "step": 340 }, { "epoch": 42.99, "learning_rate": 0.0001, "loss": 0.0484, "step": 344 }, { "epoch": 43.5, "learning_rate": 0.0001, "loss": 0.0432, "step": 348 }, { "epoch": 43.99, "learning_rate": 0.0001, "loss": 0.0446, "step": 352 }, { "epoch": 44.5, "learning_rate": 0.0001, "loss": 0.0443, "step": 356 }, { "epoch": 44.99, "learning_rate": 0.0001, "loss": 0.0425, "step": 360 }, { "epoch": 45.5, "learning_rate": 0.0001, "loss": 0.0452, "step": 364 }, { "epoch": 45.99, "learning_rate": 0.0001, "loss": 0.0429, "step": 368 }, { "epoch": 46.5, "learning_rate": 0.0001, "loss": 0.0471, "step": 372 }, { "epoch": 46.99, "learning_rate": 0.0001, "loss": 0.0433, "step": 376 }, { "epoch": 47.5, "learning_rate": 0.0001, "loss": 0.0416, "step": 380 }, { "epoch": 47.99, "learning_rate": 0.0001, "loss": 0.0397, "step": 384 }, { "epoch": 47.99, "eval_exact_match": 0.5143992055610725, "eval_exec": 0.5491559086395233, "eval_loss": 0.22859126329421997, "eval_runtime": 751.9885, "eval_samples_per_second": 1.729, "step": 384 }, { "epoch": 48.5, "learning_rate": 0.0001, "loss": 0.0449, "step": 388 }, { "epoch": 48.99, "learning_rate": 0.0001, "loss": 0.0404, "step": 392 }, { "epoch": 49.5, "learning_rate": 0.0001, "loss": 0.0384, "step": 396 }, { "epoch": 49.99, "learning_rate": 0.0001, "loss": 0.0378, "step": 400 }, { "epoch": 50.5, "learning_rate": 0.0001, "loss": 0.0405, "step": 404 }, { "epoch": 50.99, "learning_rate": 0.0001, "loss": 0.0391, "step": 408 }, { "epoch": 51.5, "learning_rate": 0.0001, "loss": 0.0377, "step": 412 }, { "epoch": 51.99, "learning_rate": 0.0001, "loss": 0.0349, "step": 416 }, { "epoch": 52.5, "learning_rate": 0.0001, "loss": 0.0328, "step": 420 }, { "epoch": 52.99, "learning_rate": 0.0001, "loss": 0.0363, "step": 424 }, { "epoch": 53.5, "learning_rate": 0.0001, "loss": 0.0355, "step": 428 }, { "epoch": 53.99, "learning_rate": 0.0001, "loss": 0.0317, "step": 432 }, { "epoch": 54.5, "learning_rate": 0.0001, "loss": 0.0308, "step": 436 }, { "epoch": 54.99, "learning_rate": 0.0001, "loss": 0.0303, "step": 440 }, { "epoch": 55.5, "learning_rate": 0.0001, "loss": 0.0313, "step": 444 }, { "epoch": 55.99, "learning_rate": 0.0001, "loss": 0.0351, "step": 448 }, { "epoch": 55.99, "eval_exact_match": 0.5243296921549155, "eval_exec": 0.5561072492552135, "eval_loss": 0.2355286180973053, "eval_runtime": 746.4078, "eval_samples_per_second": 1.742, "step": 448 }, { "epoch": 56.5, "learning_rate": 0.0001, "loss": 0.0307, "step": 452 }, { "epoch": 56.99, "learning_rate": 0.0001, "loss": 0.0296, "step": 456 }, { "epoch": 57.5, "learning_rate": 0.0001, "loss": 0.0295, "step": 460 }, { "epoch": 57.99, "learning_rate": 0.0001, "loss": 0.0316, "step": 464 }, { "epoch": 58.5, "learning_rate": 0.0001, "loss": 0.0283, "step": 468 }, { "epoch": 58.99, "learning_rate": 0.0001, "loss": 0.0321, "step": 472 }, { "epoch": 59.5, "learning_rate": 0.0001, "loss": 0.0296, "step": 476 }, { "epoch": 59.99, "learning_rate": 0.0001, "loss": 0.0262, "step": 480 }, { "epoch": 60.5, "learning_rate": 0.0001, "loss": 0.0259, "step": 484 }, { "epoch": 60.99, "learning_rate": 0.0001, "loss": 0.027, "step": 488 }, { "epoch": 61.5, "learning_rate": 0.0001, "loss": 0.0263, "step": 492 }, { "epoch": 61.99, "learning_rate": 0.0001, "loss": 0.0259, "step": 496 }, { "epoch": 62.5, "learning_rate": 0.0001, "loss": 0.0275, "step": 500 }, { "epoch": 62.99, "learning_rate": 0.0001, "loss": 0.0262, "step": 504 }, { "epoch": 63.5, "learning_rate": 0.0001, "loss": 0.0252, "step": 508 }, { "epoch": 63.99, "learning_rate": 0.0001, "loss": 0.025, "step": 512 }, { "epoch": 63.99, "eval_exact_match": 0.5094339622641509, "eval_exec": 0.5422045680238332, "eval_loss": 0.2562254071235657, "eval_runtime": 720.3713, "eval_samples_per_second": 1.805, "step": 512 }, { "epoch": 64.5, "learning_rate": 0.0001, "loss": 0.0256, "step": 516 }, { "epoch": 64.99, "learning_rate": 0.0001, "loss": 0.0272, "step": 520 }, { "epoch": 65.5, "learning_rate": 0.0001, "loss": 0.0293, "step": 524 }, { "epoch": 65.99, "learning_rate": 0.0001, "loss": 0.0231, "step": 528 }, { "epoch": 66.5, "learning_rate": 0.0001, "loss": 0.0232, "step": 532 }, { "epoch": 66.99, "learning_rate": 0.0001, "loss": 0.0253, "step": 536 }, { "epoch": 67.5, "learning_rate": 0.0001, "loss": 0.0224, "step": 540 }, { "epoch": 67.99, "learning_rate": 0.0001, "loss": 0.021, "step": 544 }, { "epoch": 68.5, "learning_rate": 0.0001, "loss": 0.0206, "step": 548 }, { "epoch": 68.99, "learning_rate": 0.0001, "loss": 0.0229, "step": 552 }, { "epoch": 69.5, "learning_rate": 0.0001, "loss": 0.0243, "step": 556 }, { "epoch": 69.99, "learning_rate": 0.0001, "loss": 0.0211, "step": 560 }, { "epoch": 70.5, "learning_rate": 0.0001, "loss": 0.0208, "step": 564 }, { "epoch": 70.99, "learning_rate": 0.0001, "loss": 0.0225, "step": 568 }, { "epoch": 71.5, "learning_rate": 0.0001, "loss": 0.0209, "step": 572 }, { "epoch": 71.99, "learning_rate": 0.0001, "loss": 0.0225, "step": 576 }, { "epoch": 71.99, "eval_exact_match": 0.5173783515392254, "eval_exec": 0.5461767626613704, "eval_loss": 0.2805687189102173, "eval_runtime": 749.8642, "eval_samples_per_second": 1.734, "step": 576 }, { "epoch": 72.5, "learning_rate": 0.0001, "loss": 0.0188, "step": 580 }, { "epoch": 72.99, "learning_rate": 0.0001, "loss": 0.0209, "step": 584 }, { "epoch": 73.5, "learning_rate": 0.0001, "loss": 0.0193, "step": 588 }, { "epoch": 73.99, "learning_rate": 0.0001, "loss": 0.0221, "step": 592 }, { "epoch": 74.5, "learning_rate": 0.0001, "loss": 0.0193, "step": 596 }, { "epoch": 74.99, "learning_rate": 0.0001, "loss": 0.0184, "step": 600 }, { "epoch": 75.5, "learning_rate": 0.0001, "loss": 0.0169, "step": 604 }, { "epoch": 75.99, "learning_rate": 0.0001, "loss": 0.0177, "step": 608 }, { "epoch": 76.5, "learning_rate": 0.0001, "loss": 0.0189, "step": 612 }, { "epoch": 76.99, "learning_rate": 0.0001, "loss": 0.0185, "step": 616 }, { "epoch": 77.5, "learning_rate": 0.0001, "loss": 0.0166, "step": 620 }, { "epoch": 77.99, "learning_rate": 0.0001, "loss": 0.0167, "step": 624 }, { "epoch": 78.5, "learning_rate": 0.0001, "loss": 0.0145, "step": 628 }, { "epoch": 78.99, "learning_rate": 0.0001, "loss": 0.0162, "step": 632 }, { "epoch": 79.5, "learning_rate": 0.0001, "loss": 0.0168, "step": 636 }, { "epoch": 79.99, "learning_rate": 0.0001, "loss": 0.0172, "step": 640 }, { "epoch": 79.99, "eval_exact_match": 0.5134061569016882, "eval_exec": 0.5431976166832175, "eval_loss": 0.2822588086128235, "eval_runtime": 735.1307, "eval_samples_per_second": 1.768, "step": 640 }, { "epoch": 80.5, "learning_rate": 0.0001, "loss": 0.017, "step": 644 }, { "epoch": 80.99, "learning_rate": 0.0001, "loss": 0.0166, "step": 648 }, { "epoch": 81.5, "learning_rate": 0.0001, "loss": 0.0157, "step": 652 }, { "epoch": 81.99, "learning_rate": 0.0001, "loss": 0.0182, "step": 656 }, { "epoch": 82.5, "learning_rate": 0.0001, "loss": 0.0168, "step": 660 }, { "epoch": 82.99, "learning_rate": 0.0001, "loss": 0.0145, "step": 664 }, { "epoch": 83.5, "learning_rate": 0.0001, "loss": 0.0142, "step": 668 }, { "epoch": 83.99, "learning_rate": 0.0001, "loss": 0.0139, "step": 672 }, { "epoch": 84.5, "learning_rate": 0.0001, "loss": 0.0151, "step": 676 }, { "epoch": 84.99, "learning_rate": 0.0001, "loss": 0.0137, "step": 680 }, { "epoch": 85.5, "learning_rate": 0.0001, "loss": 0.0137, "step": 684 }, { "epoch": 85.99, "learning_rate": 0.0001, "loss": 0.0119, "step": 688 }, { "epoch": 86.5, "learning_rate": 0.0001, "loss": 0.0128, "step": 692 }, { "epoch": 86.99, "learning_rate": 0.0001, "loss": 0.0134, "step": 696 }, { "epoch": 87.5, "learning_rate": 0.0001, "loss": 0.0143, "step": 700 }, { "epoch": 87.99, "learning_rate": 0.0001, "loss": 0.0189, "step": 704 }, { "epoch": 87.99, "eval_exact_match": 0.5134061569016882, "eval_exec": 0.5441906653426017, "eval_loss": 0.285916805267334, "eval_runtime": 740.5163, "eval_samples_per_second": 1.756, "step": 704 }, { "epoch": 88.5, "learning_rate": 0.0001, "loss": 0.0125, "step": 708 }, { "epoch": 88.99, "learning_rate": 0.0001, "loss": 0.0122, "step": 712 }, { "epoch": 89.5, "learning_rate": 0.0001, "loss": 0.0141, "step": 716 }, { "epoch": 89.99, "learning_rate": 0.0001, "loss": 0.0132, "step": 720 }, { "epoch": 90.5, "learning_rate": 0.0001, "loss": 0.0152, "step": 724 }, { "epoch": 90.99, "learning_rate": 0.0001, "loss": 0.0118, "step": 728 }, { "epoch": 91.5, "learning_rate": 0.0001, "loss": 0.0105, "step": 732 }, { "epoch": 91.99, "learning_rate": 0.0001, "loss": 0.0117, "step": 736 }, { "epoch": 92.5, "learning_rate": 0.0001, "loss": 0.0119, "step": 740 }, { "epoch": 92.99, "learning_rate": 0.0001, "loss": 0.0126, "step": 744 }, { "epoch": 93.5, "learning_rate": 0.0001, "loss": 0.0118, "step": 748 }, { "epoch": 93.99, "learning_rate": 0.0001, "loss": 0.0143, "step": 752 }, { "epoch": 94.5, "learning_rate": 0.0001, "loss": 0.0127, "step": 756 }, { "epoch": 94.99, "learning_rate": 0.0001, "loss": 0.0143, "step": 760 }, { "epoch": 95.5, "learning_rate": 0.0001, "loss": 0.0107, "step": 764 }, { "epoch": 95.99, "learning_rate": 0.0001, "loss": 0.0112, "step": 768 }, { "epoch": 95.99, "eval_exact_match": 0.506454816285998, "eval_exec": 0.5461767626613704, "eval_loss": 0.3350318670272827, "eval_runtime": 791.2409, "eval_samples_per_second": 1.643, "step": 768 }, { "epoch": 96.5, "learning_rate": 0.0001, "loss": 0.0113, "step": 772 }, { "epoch": 96.99, "learning_rate": 0.0001, "loss": 0.0107, "step": 776 }, { "epoch": 97.5, "learning_rate": 0.0001, "loss": 0.0112, "step": 780 }, { "epoch": 97.99, "learning_rate": 0.0001, "loss": 0.011, "step": 784 }, { "epoch": 98.5, "learning_rate": 0.0001, "loss": 0.012, "step": 788 }, { "epoch": 98.99, "learning_rate": 0.0001, "loss": 0.0122, "step": 792 }, { "epoch": 99.5, "learning_rate": 0.0001, "loss": 0.0113, "step": 796 }, { "epoch": 99.99, "learning_rate": 0.0001, "loss": 0.0101, "step": 800 }, { "epoch": 100.5, "learning_rate": 0.0001, "loss": 0.0089, "step": 804 }, { "epoch": 100.99, "learning_rate": 0.0001, "loss": 0.011, "step": 808 }, { "epoch": 101.5, "learning_rate": 0.0001, "loss": 0.0086, "step": 812 }, { "epoch": 101.99, "learning_rate": 0.0001, "loss": 0.0097, "step": 816 }, { "epoch": 102.5, "learning_rate": 0.0001, "loss": 0.011, "step": 820 }, { "epoch": 102.99, "learning_rate": 0.0001, "loss": 0.0085, "step": 824 }, { "epoch": 103.5, "learning_rate": 0.0001, "loss": 0.0086, "step": 828 }, { "epoch": 103.99, "learning_rate": 0.0001, "loss": 0.0094, "step": 832 }, { "epoch": 103.99, "eval_exact_match": 0.5183714001986097, "eval_exec": 0.5441906653426017, "eval_loss": 0.34131428599357605, "eval_runtime": 681.4487, "eval_samples_per_second": 1.908, "step": 832 }, { "epoch": 104.5, "learning_rate": 0.0001, "loss": 0.0111, "step": 836 }, { "epoch": 104.99, "learning_rate": 0.0001, "loss": 0.0095, "step": 840 }, { "epoch": 105.5, "learning_rate": 0.0001, "loss": 0.0083, "step": 844 }, { "epoch": 105.99, "learning_rate": 0.0001, "loss": 0.0083, "step": 848 }, { "epoch": 106.5, "learning_rate": 0.0001, "loss": 0.0085, "step": 852 }, { "epoch": 106.99, "learning_rate": 0.0001, "loss": 0.0086, "step": 856 }, { "epoch": 107.5, "learning_rate": 0.0001, "loss": 0.0085, "step": 860 }, { "epoch": 107.99, "learning_rate": 0.0001, "loss": 0.0088, "step": 864 }, { "epoch": 108.5, "learning_rate": 0.0001, "loss": 0.0094, "step": 868 }, { "epoch": 108.99, "learning_rate": 0.0001, "loss": 0.0084, "step": 872 }, { "epoch": 109.5, "learning_rate": 0.0001, "loss": 0.0075, "step": 876 }, { "epoch": 109.99, "learning_rate": 0.0001, "loss": 0.0093, "step": 880 }, { "epoch": 110.5, "learning_rate": 0.0001, "loss": 0.0091, "step": 884 }, { "epoch": 110.99, "learning_rate": 0.0001, "loss": 0.0088, "step": 888 }, { "epoch": 111.5, "learning_rate": 0.0001, "loss": 0.0083, "step": 892 }, { "epoch": 111.99, "learning_rate": 0.0001, "loss": 0.0086, "step": 896 }, { "epoch": 111.99, "eval_exact_match": 0.5243296921549155, "eval_exec": 0.5521350546176763, "eval_loss": 0.3550397753715515, "eval_runtime": 717.3828, "eval_samples_per_second": 1.812, "step": 896 }, { "epoch": 112.5, "learning_rate": 0.0001, "loss": 0.0086, "step": 900 }, { "epoch": 112.99, "learning_rate": 0.0001, "loss": 0.0085, "step": 904 }, { "epoch": 113.5, "learning_rate": 0.0001, "loss": 0.0076, "step": 908 }, { "epoch": 113.99, "learning_rate": 0.0001, "loss": 0.0093, "step": 912 }, { "epoch": 114.5, "learning_rate": 0.0001, "loss": 0.0068, "step": 916 }, { "epoch": 114.99, "learning_rate": 0.0001, "loss": 0.0075, "step": 920 }, { "epoch": 115.5, "learning_rate": 0.0001, "loss": 0.0073, "step": 924 }, { "epoch": 115.99, "learning_rate": 0.0001, "loss": 0.0071, "step": 928 }, { "epoch": 116.5, "learning_rate": 0.0001, "loss": 0.0077, "step": 932 }, { "epoch": 116.99, "learning_rate": 0.0001, "loss": 0.006, "step": 936 }, { "epoch": 117.5, "learning_rate": 0.0001, "loss": 0.0065, "step": 940 }, { "epoch": 117.99, "learning_rate": 0.0001, "loss": 0.008, "step": 944 }, { "epoch": 118.5, "learning_rate": 0.0001, "loss": 0.0069, "step": 948 }, { "epoch": 118.99, "learning_rate": 0.0001, "loss": 0.0082, "step": 952 }, { "epoch": 119.5, "learning_rate": 0.0001, "loss": 0.0058, "step": 956 }, { "epoch": 119.99, "learning_rate": 0.0001, "loss": 0.0072, "step": 960 }, { "epoch": 119.99, "eval_exact_match": 0.5104270109235353, "eval_exec": 0.5431976166832175, "eval_loss": 0.3599448800086975, "eval_runtime": 731.1991, "eval_samples_per_second": 1.778, "step": 960 }, { "epoch": 120.5, "learning_rate": 0.0001, "loss": 0.0062, "step": 964 }, { "epoch": 120.99, "learning_rate": 0.0001, "loss": 0.0066, "step": 968 }, { "epoch": 121.5, "learning_rate": 0.0001, "loss": 0.0054, "step": 972 }, { "epoch": 121.99, "learning_rate": 0.0001, "loss": 0.0055, "step": 976 }, { "epoch": 122.5, "learning_rate": 0.0001, "loss": 0.0054, "step": 980 }, { "epoch": 122.99, "learning_rate": 0.0001, "loss": 0.0066, "step": 984 }, { "epoch": 123.5, "learning_rate": 0.0001, "loss": 0.0068, "step": 988 }, { "epoch": 123.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 992 }, { "epoch": 124.5, "learning_rate": 0.0001, "loss": 0.0065, "step": 996 }, { "epoch": 124.99, "learning_rate": 0.0001, "loss": 0.0067, "step": 1000 }, { "epoch": 125.5, "learning_rate": 0.0001, "loss": 0.0061, "step": 1004 }, { "epoch": 125.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1008 }, { "epoch": 126.5, "learning_rate": 0.0001, "loss": 0.0056, "step": 1012 }, { "epoch": 126.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1016 }, { "epoch": 127.5, "learning_rate": 0.0001, "loss": 0.0062, "step": 1020 }, { "epoch": 127.99, "learning_rate": 0.0001, "loss": 0.006, "step": 1024 }, { "epoch": 127.99, "eval_exact_match": 0.5203574975173784, "eval_exec": 0.5501489572989077, "eval_loss": 0.39113467931747437, "eval_runtime": 888.0535, "eval_samples_per_second": 1.464, "step": 1024 }, { "epoch": 128.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1028 }, { "epoch": 128.99, "learning_rate": 0.0001, "loss": 0.0065, "step": 1032 }, { "epoch": 129.5, "learning_rate": 0.0001, "loss": 0.0047, "step": 1036 }, { "epoch": 129.99, "learning_rate": 0.0001, "loss": 0.005, "step": 1040 }, { "epoch": 130.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1044 }, { "epoch": 130.99, "learning_rate": 0.0001, "loss": 0.0053, "step": 1048 }, { "epoch": 131.5, "learning_rate": 0.0001, "loss": 0.0063, "step": 1052 }, { "epoch": 131.99, "learning_rate": 0.0001, "loss": 0.0095, "step": 1056 }, { "epoch": 132.5, "learning_rate": 0.0001, "loss": 0.0058, "step": 1060 }, { "epoch": 132.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1064 }, { "epoch": 133.5, "learning_rate": 0.0001, "loss": 0.0067, "step": 1068 }, { "epoch": 133.99, "learning_rate": 0.0001, "loss": 0.0056, "step": 1072 }, { "epoch": 134.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1076 }, { "epoch": 134.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1080 }, { "epoch": 135.5, "learning_rate": 0.0001, "loss": 0.0049, "step": 1084 }, { "epoch": 135.99, "learning_rate": 0.0001, "loss": 0.0054, "step": 1088 }, { "epoch": 135.99, "eval_exact_match": 0.5084409136047666, "eval_exec": 0.535253227408143, "eval_loss": 0.3826310336589813, "eval_runtime": 944.2579, "eval_samples_per_second": 1.377, "step": 1088 }, { "epoch": 136.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1092 }, { "epoch": 136.99, "learning_rate": 0.0001, "loss": 0.0048, "step": 1096 }, { "epoch": 137.5, "learning_rate": 0.0001, "loss": 0.0055, "step": 1100 }, { "epoch": 137.99, "learning_rate": 0.0001, "loss": 0.0048, "step": 1104 }, { "epoch": 138.5, "learning_rate": 0.0001, "loss": 0.0054, "step": 1108 }, { "epoch": 138.99, "learning_rate": 0.0001, "loss": 0.0051, "step": 1112 }, { "epoch": 139.5, "learning_rate": 0.0001, "loss": 0.0047, "step": 1116 }, { "epoch": 139.99, "learning_rate": 0.0001, "loss": 0.0045, "step": 1120 }, { "epoch": 140.5, "learning_rate": 0.0001, "loss": 0.0087, "step": 1124 }, { "epoch": 140.99, "learning_rate": 0.0001, "loss": 0.0043, "step": 1128 }, { "epoch": 141.5, "learning_rate": 0.0001, "loss": 0.0041, "step": 1132 }, { "epoch": 141.99, "learning_rate": 0.0001, "loss": 0.0047, "step": 1136 }, { "epoch": 142.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1140 }, { "epoch": 142.99, "learning_rate": 0.0001, "loss": 0.0042, "step": 1144 }, { "epoch": 143.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1148 }, { "epoch": 143.99, "learning_rate": 0.0001, "loss": 0.0052, "step": 1152 }, { "epoch": 143.99, "eval_exact_match": 0.506454816285998, "eval_exec": 0.5402184707050646, "eval_loss": 0.39830923080444336, "eval_runtime": 975.7311, "eval_samples_per_second": 1.332, "step": 1152 }, { "epoch": 144.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1156 }, { "epoch": 144.99, "learning_rate": 0.0001, "loss": 0.0051, "step": 1160 }, { "epoch": 145.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1164 }, { "epoch": 145.99, "learning_rate": 0.0001, "loss": 0.004, "step": 1168 }, { "epoch": 146.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1172 }, { "epoch": 146.99, "learning_rate": 0.0001, "loss": 0.0041, "step": 1176 }, { "epoch": 147.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1180 }, { "epoch": 147.99, "learning_rate": 0.0001, "loss": 0.0043, "step": 1184 }, { "epoch": 148.5, "learning_rate": 0.0001, "loss": 0.0041, "step": 1188 }, { "epoch": 148.99, "learning_rate": 0.0001, "loss": 0.0048, "step": 1192 }, { "epoch": 149.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1196 }, { "epoch": 149.99, "learning_rate": 0.0001, "loss": 0.004, "step": 1200 }, { "epoch": 150.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1204 }, { "epoch": 150.99, "learning_rate": 0.0001, "loss": 0.0063, "step": 1208 }, { "epoch": 151.5, "learning_rate": 0.0001, "loss": 0.0043, "step": 1212 }, { "epoch": 151.99, "learning_rate": 0.0001, "loss": 0.0084, "step": 1216 }, { "epoch": 151.99, "eval_exact_match": 0.44885799404170806, "eval_exec": 0.5014895729890765, "eval_loss": 0.34375911951065063, "eval_runtime": 974.1005, "eval_samples_per_second": 1.335, "step": 1216 }, { "epoch": 152.5, "learning_rate": 0.0001, "loss": 0.0114, "step": 1220 }, { "epoch": 152.99, "learning_rate": 0.0001, "loss": 0.0044, "step": 1224 }, { "epoch": 153.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1228 }, { "epoch": 153.99, "learning_rate": 0.0001, "loss": 0.0045, "step": 1232 }, { "epoch": 154.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1236 }, { "epoch": 154.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1240 }, { "epoch": 155.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 1244 }, { "epoch": 155.99, "learning_rate": 0.0001, "loss": 0.0028, "step": 1248 }, { "epoch": 156.5, "learning_rate": 0.0001, "loss": 0.0039, "step": 1252 }, { "epoch": 156.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1256 }, { "epoch": 157.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 1260 }, { "epoch": 157.99, "learning_rate": 0.0001, "loss": 0.0028, "step": 1264 }, { "epoch": 158.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1268 }, { "epoch": 158.99, "learning_rate": 0.0001, "loss": 0.0034, "step": 1272 }, { "epoch": 159.5, "learning_rate": 0.0001, "loss": 0.0033, "step": 1276 }, { "epoch": 159.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1280 }, { "epoch": 159.99, "eval_exact_match": 0.5153922542204568, "eval_exec": 0.5441906653426017, "eval_loss": 0.41516825556755066, "eval_runtime": 976.5716, "eval_samples_per_second": 1.331, "step": 1280 }, { "epoch": 160.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1284 }, { "epoch": 160.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 1288 }, { "epoch": 161.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 1292 }, { "epoch": 161.99, "learning_rate": 0.0001, "loss": 0.0039, "step": 1296 }, { "epoch": 162.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1300 }, { "epoch": 162.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1304 }, { "epoch": 163.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1308 }, { "epoch": 163.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1312 }, { "epoch": 164.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1316 }, { "epoch": 164.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1320 }, { "epoch": 165.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1324 }, { "epoch": 165.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 1328 }, { "epoch": 166.5, "learning_rate": 0.0001, "loss": 0.0039, "step": 1332 }, { "epoch": 166.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 1336 }, { "epoch": 167.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 1340 }, { "epoch": 167.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 1344 }, { "epoch": 167.99, "eval_exact_match": 0.5143992055610725, "eval_exec": 0.5382323733862959, "eval_loss": 0.4463094472885132, "eval_runtime": 955.823, "eval_samples_per_second": 1.36, "step": 1344 }, { "epoch": 168.5, "learning_rate": 0.0001, "loss": 0.003, "step": 1348 }, { "epoch": 168.99, "learning_rate": 0.0001, "loss": 0.0049, "step": 1352 }, { "epoch": 169.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1356 }, { "epoch": 169.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1360 }, { "epoch": 170.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1364 }, { "epoch": 170.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1368 }, { "epoch": 171.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 1372 }, { "epoch": 171.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 1376 }, { "epoch": 172.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1380 }, { "epoch": 172.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 1384 }, { "epoch": 173.5, "learning_rate": 0.0001, "loss": 0.003, "step": 1388 }, { "epoch": 173.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1392 }, { "epoch": 174.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1396 }, { "epoch": 174.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 1400 }, { "epoch": 175.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1404 }, { "epoch": 175.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 1408 }, { "epoch": 175.99, "eval_exact_match": 0.5074478649453823, "eval_exec": 0.5342601787487586, "eval_loss": 0.4188894033432007, "eval_runtime": 941.4278, "eval_samples_per_second": 1.381, "step": 1408 }, { "epoch": 176.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1412 }, { "epoch": 176.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 1416 }, { "epoch": 177.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 1420 }, { "epoch": 177.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 1424 }, { "epoch": 178.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 1428 }, { "epoch": 178.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1432 }, { "epoch": 179.5, "learning_rate": 0.0001, "loss": 0.0057, "step": 1436 }, { "epoch": 179.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 1440 }, { "epoch": 180.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 1444 }, { "epoch": 180.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 1448 }, { "epoch": 181.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1452 }, { "epoch": 181.99, "learning_rate": 0.0001, "loss": 0.003, "step": 1456 }, { "epoch": 182.5, "learning_rate": 0.0001, "loss": 0.0049, "step": 1460 }, { "epoch": 182.99, "learning_rate": 0.0001, "loss": 0.0051, "step": 1464 }, { "epoch": 183.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 1468 }, { "epoch": 183.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 1472 }, { "epoch": 183.99, "eval_exact_match": 0.519364448857994, "eval_exec": 0.5441906653426017, "eval_loss": 0.422220915555954, "eval_runtime": 973.3607, "eval_samples_per_second": 1.336, "step": 1472 }, { "epoch": 184.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 1476 }, { "epoch": 184.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1480 }, { "epoch": 185.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 1484 }, { "epoch": 185.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 1488 }, { "epoch": 186.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1492 }, { "epoch": 186.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1496 }, { "epoch": 187.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 1500 }, { "epoch": 187.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 1504 }, { "epoch": 188.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 1508 }, { "epoch": 188.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1512 }, { "epoch": 189.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1516 }, { "epoch": 189.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 1520 }, { "epoch": 190.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 1524 }, { "epoch": 190.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 1528 }, { "epoch": 191.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 1532 }, { "epoch": 191.99, "learning_rate": 0.0001, "loss": 0.0034, "step": 1536 }, { "epoch": 191.99, "eval_exact_match": 0.5203574975173784, "eval_exec": 0.5491559086395233, "eval_loss": 0.44573745131492615, "eval_runtime": 974.7523, "eval_samples_per_second": 1.334, "step": 1536 }, { "epoch": 192.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 1540 }, { "epoch": 192.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 1544 }, { "epoch": 193.5, "learning_rate": 0.0001, "loss": 0.003, "step": 1548 }, { "epoch": 193.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1552 }, { "epoch": 194.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 1556 }, { "epoch": 194.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1560 }, { "epoch": 195.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1564 }, { "epoch": 195.99, "learning_rate": 0.0001, "loss": 0.002, "step": 1568 }, { "epoch": 196.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 1572 }, { "epoch": 196.99, "learning_rate": 0.0001, "loss": 0.002, "step": 1576 }, { "epoch": 197.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 1580 }, { "epoch": 197.99, "learning_rate": 0.0001, "loss": 0.003, "step": 1584 }, { "epoch": 198.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1588 }, { "epoch": 198.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1592 }, { "epoch": 199.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1596 }, { "epoch": 199.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1600 }, { "epoch": 199.99, "eval_exact_match": 0.5114200595829196, "eval_exec": 0.5521350546176763, "eval_loss": 0.4328438639640808, "eval_runtime": 959.4133, "eval_samples_per_second": 1.355, "step": 1600 }, { "epoch": 200.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1604 }, { "epoch": 200.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1608 }, { "epoch": 201.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1612 }, { "epoch": 201.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 1616 }, { "epoch": 202.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 1620 }, { "epoch": 202.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 1624 }, { "epoch": 203.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 1628 }, { "epoch": 203.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1632 }, { "epoch": 204.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1636 }, { "epoch": 204.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 1640 }, { "epoch": 205.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 1644 }, { "epoch": 205.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 1648 }, { "epoch": 206.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1652 }, { "epoch": 206.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1656 }, { "epoch": 207.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 1660 }, { "epoch": 207.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1664 }, { "epoch": 207.99, "eval_exact_match": 0.5153922542204568, "eval_exec": 0.5422045680238332, "eval_loss": 0.46325209736824036, "eval_runtime": 948.9363, "eval_samples_per_second": 1.37, "step": 1664 }, { "epoch": 208.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 1668 }, { "epoch": 208.99, "learning_rate": 0.0001, "loss": 0.003, "step": 1672 }, { "epoch": 209.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 1676 }, { "epoch": 209.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 1680 }, { "epoch": 210.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 1684 }, { "epoch": 210.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 1688 }, { "epoch": 211.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 1692 }, { "epoch": 211.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 1696 }, { "epoch": 212.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 1700 }, { "epoch": 212.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1704 }, { "epoch": 213.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1708 }, { "epoch": 213.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 1712 }, { "epoch": 214.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1716 }, { "epoch": 214.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 1720 }, { "epoch": 215.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 1724 }, { "epoch": 215.99, "learning_rate": 0.0001, "loss": 0.002, "step": 1728 }, { "epoch": 215.99, "eval_exact_match": 0.5134061569016882, "eval_exec": 0.551142005958292, "eval_loss": 0.4671519100666046, "eval_runtime": 968.39, "eval_samples_per_second": 1.342, "step": 1728 }, { "epoch": 216.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1732 }, { "epoch": 216.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 1736 }, { "epoch": 217.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1740 }, { "epoch": 217.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 1744 }, { "epoch": 218.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 1748 }, { "epoch": 218.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 1752 }, { "epoch": 219.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1756 }, { "epoch": 219.99, "learning_rate": 0.0001, "loss": 0.002, "step": 1760 }, { "epoch": 220.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 1764 }, { "epoch": 220.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 1768 }, { "epoch": 221.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 1772 }, { "epoch": 221.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1776 }, { "epoch": 222.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 1780 }, { "epoch": 222.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 1784 }, { "epoch": 223.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 1788 }, { "epoch": 223.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 1792 }, { "epoch": 223.99, "eval_exact_match": 0.5143992055610725, "eval_exec": 0.5402184707050646, "eval_loss": 0.4584057629108429, "eval_runtime": 959.2229, "eval_samples_per_second": 1.355, "step": 1792 }, { "epoch": 224.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 1796 }, { "epoch": 224.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 1800 }, { "epoch": 225.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 1804 }, { "epoch": 225.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 1808 }, { "epoch": 226.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 1812 }, { "epoch": 226.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1816 }, { "epoch": 227.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 1820 }, { "epoch": 227.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 1824 }, { "epoch": 228.5, "learning_rate": 0.0001, "loss": 0.002, "step": 1828 }, { "epoch": 228.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 1832 }, { "epoch": 229.5, "learning_rate": 0.0001, "loss": 0.0063, "step": 1836 }, { "epoch": 229.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 1840 }, { "epoch": 230.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 1844 }, { "epoch": 230.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1848 }, { "epoch": 231.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 1852 }, { "epoch": 231.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 1856 }, { "epoch": 231.99, "eval_exact_match": 0.5253227408142999, "eval_exec": 0.5561072492552135, "eval_loss": 0.44773775339126587, "eval_runtime": 980.451, "eval_samples_per_second": 1.326, "step": 1856 } ], "max_steps": 24576, "num_train_epochs": 3072, "total_flos": 7.797838246859244e+18, "trial_name": null, "trial_params": null }