{ "best_metric": 0.7117988394584139, "best_model_checkpoint": "/experiments/spider/d6368230-e8a6-4e57-923c-f0938a9bcf7b/checkpoint-2624", "epoch": 874.5844748858448, "global_step": 2624, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 0.0001, "loss": 3.2183, "step": 1 }, { "epoch": 1.29, "learning_rate": 0.0001, "loss": 2.451, "step": 4 }, { "epoch": 2.58, "learning_rate": 0.0001, "loss": 1.2943, "step": 8 }, { "epoch": 3.88, "learning_rate": 0.0001, "loss": 0.8542, "step": 12 }, { "epoch": 5.29, "learning_rate": 0.0001, "loss": 0.7239, "step": 16 }, { "epoch": 6.58, "learning_rate": 0.0001, "loss": 0.5427, "step": 20 }, { "epoch": 7.88, "learning_rate": 0.0001, "loss": 0.4532, "step": 24 }, { "epoch": 9.29, "learning_rate": 0.0001, "loss": 0.4275, "step": 28 }, { "epoch": 10.58, "learning_rate": 0.0001, "loss": 0.3646, "step": 32 }, { "epoch": 11.88, "learning_rate": 0.0001, "loss": 0.3155, "step": 36 }, { "epoch": 13.29, "learning_rate": 0.0001, "loss": 0.3455, "step": 40 }, { "epoch": 14.58, "learning_rate": 0.0001, "loss": 0.2804, "step": 44 }, { "epoch": 15.88, "learning_rate": 0.0001, "loss": 0.2518, "step": 48 }, { "epoch": 17.29, "learning_rate": 0.0001, "loss": 0.248, "step": 52 }, { "epoch": 18.58, "learning_rate": 0.0001, "loss": 0.2021, "step": 56 }, { "epoch": 19.88, "learning_rate": 0.0001, "loss": 0.212, "step": 60 }, { "epoch": 21.29, "learning_rate": 0.0001, "loss": 0.203, "step": 64 }, { "epoch": 21.29, "eval_exact_match": 0.42359767891682787, "eval_exec": 0.4284332688588008, "eval_loss": 0.1833169162273407, "eval_runtime": 682.1232, "eval_samples_per_second": 1.516, "step": 64 }, { "epoch": 22.58, "learning_rate": 0.0001, "loss": 0.1895, "step": 68 }, { "epoch": 23.88, "learning_rate": 0.0001, "loss": 0.166, "step": 72 }, { "epoch": 25.29, "learning_rate": 0.0001, "loss": 0.1833, "step": 76 }, { "epoch": 26.58, "learning_rate": 0.0001, "loss": 0.1503, "step": 80 }, { "epoch": 27.88, "learning_rate": 0.0001, "loss": 0.1485, "step": 84 }, { "epoch": 29.29, "learning_rate": 0.0001, "loss": 0.1574, "step": 88 }, { "epoch": 30.58, "learning_rate": 0.0001, "loss": 0.1295, "step": 92 }, { "epoch": 31.88, "learning_rate": 0.0001, "loss": 0.1199, "step": 96 }, { "epoch": 33.29, "learning_rate": 0.0001, "loss": 0.1262, "step": 100 }, { "epoch": 34.58, "learning_rate": 0.0001, "loss": 0.1153, "step": 104 }, { "epoch": 35.88, "learning_rate": 0.0001, "loss": 0.1067, "step": 108 }, { "epoch": 37.29, "learning_rate": 0.0001, "loss": 0.1086, "step": 112 }, { "epoch": 38.58, "learning_rate": 0.0001, "loss": 0.0934, "step": 116 }, { "epoch": 39.88, "learning_rate": 0.0001, "loss": 0.096, "step": 120 }, { "epoch": 41.29, "learning_rate": 0.0001, "loss": 0.1075, "step": 124 }, { "epoch": 42.58, "learning_rate": 0.0001, "loss": 0.0984, "step": 128 }, { "epoch": 42.58, "eval_exact_match": 0.5938104448742747, "eval_exec": 0.5764023210831721, "eval_loss": 0.16683875024318695, "eval_runtime": 703.3328, "eval_samples_per_second": 1.47, "step": 128 }, { "epoch": 43.88, "learning_rate": 0.0001, "loss": 0.0842, "step": 132 }, { "epoch": 45.29, "learning_rate": 0.0001, "loss": 0.0954, "step": 136 }, { "epoch": 46.58, "learning_rate": 0.0001, "loss": 0.0842, "step": 140 }, { "epoch": 47.88, "learning_rate": 0.0001, "loss": 0.0724, "step": 144 }, { "epoch": 49.29, "learning_rate": 0.0001, "loss": 0.0805, "step": 148 }, { "epoch": 50.58, "learning_rate": 0.0001, "loss": 0.0745, "step": 152 }, { "epoch": 51.88, "learning_rate": 0.0001, "loss": 0.0685, "step": 156 }, { "epoch": 53.29, "learning_rate": 0.0001, "loss": 0.0709, "step": 160 }, { "epoch": 54.58, "learning_rate": 0.0001, "loss": 0.0744, "step": 164 }, { "epoch": 55.88, "learning_rate": 0.0001, "loss": 0.0603, "step": 168 }, { "epoch": 57.29, "learning_rate": 0.0001, "loss": 0.0607, "step": 172 }, { "epoch": 58.58, "learning_rate": 0.0001, "loss": 0.0652, "step": 176 }, { "epoch": 59.88, "learning_rate": 0.0001, "loss": 0.0592, "step": 180 }, { "epoch": 61.29, "learning_rate": 0.0001, "loss": 0.0584, "step": 184 }, { "epoch": 62.58, "learning_rate": 0.0001, "loss": 0.0507, "step": 188 }, { "epoch": 63.88, "learning_rate": 0.0001, "loss": 0.0523, "step": 192 }, { "epoch": 63.88, "eval_exact_match": 0.6266924564796905, "eval_exec": 0.620889748549323, "eval_loss": 0.16735364496707916, "eval_runtime": 843.3561, "eval_samples_per_second": 1.226, "step": 192 }, { "epoch": 65.29, "learning_rate": 0.0001, "loss": 0.0576, "step": 196 }, { "epoch": 66.58, "learning_rate": 0.0001, "loss": 0.0539, "step": 200 }, { "epoch": 67.88, "learning_rate": 0.0001, "loss": 0.0443, "step": 204 }, { "epoch": 69.29, "learning_rate": 0.0001, "loss": 0.0518, "step": 208 }, { "epoch": 70.58, "learning_rate": 0.0001, "loss": 0.0435, "step": 212 }, { "epoch": 71.88, "learning_rate": 0.0001, "loss": 0.0428, "step": 216 }, { "epoch": 73.29, "learning_rate": 0.0001, "loss": 0.043, "step": 220 }, { "epoch": 74.58, "learning_rate": 0.0001, "loss": 0.0383, "step": 224 }, { "epoch": 75.88, "learning_rate": 0.0001, "loss": 0.0399, "step": 228 }, { "epoch": 77.29, "learning_rate": 0.0001, "loss": 0.0422, "step": 232 }, { "epoch": 78.58, "learning_rate": 0.0001, "loss": 0.0357, "step": 236 }, { "epoch": 79.88, "learning_rate": 0.0001, "loss": 0.0346, "step": 240 }, { "epoch": 81.29, "learning_rate": 0.0001, "loss": 0.0433, "step": 244 }, { "epoch": 82.58, "learning_rate": 0.0001, "loss": 0.0339, "step": 248 }, { "epoch": 83.88, "learning_rate": 0.0001, "loss": 0.0353, "step": 252 }, { "epoch": 85.29, "learning_rate": 0.0001, "loss": 0.0326, "step": 256 }, { "epoch": 85.29, "eval_exact_match": 0.6586073500967118, "eval_exec": 0.6450676982591876, "eval_loss": 0.21498563885688782, "eval_runtime": 824.5584, "eval_samples_per_second": 1.254, "step": 256 }, { "epoch": 86.58, "learning_rate": 0.0001, "loss": 0.0301, "step": 260 }, { "epoch": 87.88, "learning_rate": 0.0001, "loss": 0.0302, "step": 264 }, { "epoch": 89.29, "learning_rate": 0.0001, "loss": 0.0322, "step": 268 }, { "epoch": 90.58, "learning_rate": 0.0001, "loss": 0.0248, "step": 272 }, { "epoch": 91.88, "learning_rate": 0.0001, "loss": 0.026, "step": 276 }, { "epoch": 93.29, "learning_rate": 0.0001, "loss": 0.0297, "step": 280 }, { "epoch": 94.58, "learning_rate": 0.0001, "loss": 0.0276, "step": 284 }, { "epoch": 95.88, "learning_rate": 0.0001, "loss": 0.0253, "step": 288 }, { "epoch": 97.29, "learning_rate": 0.0001, "loss": 0.0282, "step": 292 }, { "epoch": 98.58, "learning_rate": 0.0001, "loss": 0.0252, "step": 296 }, { "epoch": 99.88, "learning_rate": 0.0001, "loss": 0.0234, "step": 300 }, { "epoch": 101.29, "learning_rate": 0.0001, "loss": 0.0225, "step": 304 }, { "epoch": 102.58, "learning_rate": 0.0001, "loss": 0.0254, "step": 308 }, { "epoch": 103.88, "learning_rate": 0.0001, "loss": 0.0201, "step": 312 }, { "epoch": 105.29, "learning_rate": 0.0001, "loss": 0.022, "step": 316 }, { "epoch": 106.58, "learning_rate": 0.0001, "loss": 0.0206, "step": 320 }, { "epoch": 106.58, "eval_exact_match": 0.6644100580270793, "eval_exec": 0.6499032882011605, "eval_loss": 0.21441736817359924, "eval_runtime": 835.4598, "eval_samples_per_second": 1.238, "step": 320 }, { "epoch": 107.88, "learning_rate": 0.0001, "loss": 0.0178, "step": 324 }, { "epoch": 109.29, "learning_rate": 0.0001, "loss": 0.0172, "step": 328 }, { "epoch": 110.58, "learning_rate": 0.0001, "loss": 0.0175, "step": 332 }, { "epoch": 111.88, "learning_rate": 0.0001, "loss": 0.0155, "step": 336 }, { "epoch": 113.29, "learning_rate": 0.0001, "loss": 0.0187, "step": 340 }, { "epoch": 114.58, "learning_rate": 0.0001, "loss": 0.0142, "step": 344 }, { "epoch": 115.88, "learning_rate": 0.0001, "loss": 0.0149, "step": 348 }, { "epoch": 117.29, "learning_rate": 0.0001, "loss": 0.0155, "step": 352 }, { "epoch": 118.58, "learning_rate": 0.0001, "loss": 0.0148, "step": 356 }, { "epoch": 119.88, "learning_rate": 0.0001, "loss": 0.0138, "step": 360 }, { "epoch": 121.29, "learning_rate": 0.0001, "loss": 0.0154, "step": 364 }, { "epoch": 122.58, "learning_rate": 0.0001, "loss": 0.0145, "step": 368 }, { "epoch": 123.88, "learning_rate": 0.0001, "loss": 0.0172, "step": 372 }, { "epoch": 125.29, "learning_rate": 0.0001, "loss": 0.0146, "step": 376 }, { "epoch": 126.58, "learning_rate": 0.0001, "loss": 0.0156, "step": 380 }, { "epoch": 127.88, "learning_rate": 0.0001, "loss": 0.0177, "step": 384 }, { "epoch": 127.88, "eval_exact_match": 0.6682785299806576, "eval_exec": 0.6518375241779497, "eval_loss": 0.24249066412448883, "eval_runtime": 822.9024, "eval_samples_per_second": 1.257, "step": 384 }, { "epoch": 129.29, "learning_rate": 0.0001, "loss": 0.0134, "step": 388 }, { "epoch": 130.58, "learning_rate": 0.0001, "loss": 0.0119, "step": 392 }, { "epoch": 131.88, "learning_rate": 0.0001, "loss": 0.01, "step": 396 }, { "epoch": 133.29, "learning_rate": 0.0001, "loss": 0.0105, "step": 400 }, { "epoch": 134.58, "learning_rate": 0.0001, "loss": 0.0086, "step": 404 }, { "epoch": 135.88, "learning_rate": 0.0001, "loss": 0.0118, "step": 408 }, { "epoch": 137.29, "learning_rate": 0.0001, "loss": 0.01, "step": 412 }, { "epoch": 138.58, "learning_rate": 0.0001, "loss": 0.0102, "step": 416 }, { "epoch": 139.88, "learning_rate": 0.0001, "loss": 0.0106, "step": 420 }, { "epoch": 141.29, "learning_rate": 0.0001, "loss": 0.0087, "step": 424 }, { "epoch": 142.58, "learning_rate": 0.0001, "loss": 0.0089, "step": 428 }, { "epoch": 143.88, "learning_rate": 0.0001, "loss": 0.0099, "step": 432 }, { "epoch": 145.29, "learning_rate": 0.0001, "loss": 0.0092, "step": 436 }, { "epoch": 146.58, "learning_rate": 0.0001, "loss": 0.0081, "step": 440 }, { "epoch": 147.88, "learning_rate": 0.0001, "loss": 0.0088, "step": 444 }, { "epoch": 149.29, "learning_rate": 0.0001, "loss": 0.0079, "step": 448 }, { "epoch": 149.29, "eval_exact_match": 0.6499032882011605, "eval_exec": 0.6353965183752418, "eval_loss": 0.2822052538394928, "eval_runtime": 813.617, "eval_samples_per_second": 1.271, "step": 448 }, { "epoch": 150.58, "learning_rate": 0.0001, "loss": 0.0077, "step": 452 }, { "epoch": 151.88, "learning_rate": 0.0001, "loss": 0.0075, "step": 456 }, { "epoch": 153.29, "learning_rate": 0.0001, "loss": 0.0071, "step": 460 }, { "epoch": 154.58, "learning_rate": 0.0001, "loss": 0.008, "step": 464 }, { "epoch": 155.88, "learning_rate": 0.0001, "loss": 0.0063, "step": 468 }, { "epoch": 157.29, "learning_rate": 0.0001, "loss": 0.0078, "step": 472 }, { "epoch": 158.58, "learning_rate": 0.0001, "loss": 0.0069, "step": 476 }, { "epoch": 159.88, "learning_rate": 0.0001, "loss": 0.0073, "step": 480 }, { "epoch": 161.29, "learning_rate": 0.0001, "loss": 0.0111, "step": 484 }, { "epoch": 162.58, "learning_rate": 0.0001, "loss": 0.0063, "step": 488 }, { "epoch": 163.88, "learning_rate": 0.0001, "loss": 0.0078, "step": 492 }, { "epoch": 165.29, "learning_rate": 0.0001, "loss": 0.0121, "step": 496 }, { "epoch": 166.58, "learning_rate": 0.0001, "loss": 0.0086, "step": 500 }, { "epoch": 167.88, "learning_rate": 0.0001, "loss": 0.0066, "step": 504 }, { "epoch": 169.29, "learning_rate": 0.0001, "loss": 0.0064, "step": 508 }, { "epoch": 170.58, "learning_rate": 0.0001, "loss": 0.0046, "step": 512 }, { "epoch": 170.58, "eval_exact_match": 0.660541586073501, "eval_exec": 0.6479690522243714, "eval_loss": 0.29858535528182983, "eval_runtime": 852.1163, "eval_samples_per_second": 1.213, "step": 512 }, { "epoch": 171.88, "learning_rate": 0.0001, "loss": 0.0051, "step": 516 }, { "epoch": 173.29, "learning_rate": 0.0001, "loss": 0.0053, "step": 520 }, { "epoch": 174.58, "learning_rate": 0.0001, "loss": 0.0054, "step": 524 }, { "epoch": 175.88, "learning_rate": 0.0001, "loss": 0.0052, "step": 528 }, { "epoch": 177.29, "learning_rate": 0.0001, "loss": 0.0051, "step": 532 }, { "epoch": 178.58, "learning_rate": 0.0001, "loss": 0.0043, "step": 536 }, { "epoch": 179.88, "learning_rate": 0.0001, "loss": 0.0047, "step": 540 }, { "epoch": 181.29, "learning_rate": 0.0001, "loss": 0.0051, "step": 544 }, { "epoch": 182.58, "learning_rate": 0.0001, "loss": 0.0164, "step": 548 }, { "epoch": 183.88, "learning_rate": 0.0001, "loss": 0.0044, "step": 552 }, { "epoch": 185.29, "learning_rate": 0.0001, "loss": 0.0048, "step": 556 }, { "epoch": 186.58, "learning_rate": 0.0001, "loss": 0.0038, "step": 560 }, { "epoch": 187.88, "learning_rate": 0.0001, "loss": 0.0033, "step": 564 }, { "epoch": 189.29, "learning_rate": 0.0001, "loss": 0.0041, "step": 568 }, { "epoch": 190.58, "learning_rate": 0.0001, "loss": 0.0038, "step": 572 }, { "epoch": 191.88, "learning_rate": 0.0001, "loss": 0.0044, "step": 576 }, { "epoch": 191.88, "eval_exact_match": 0.6663442940038685, "eval_exec": 0.6479690522243714, "eval_loss": 0.3111841380596161, "eval_runtime": 837.4186, "eval_samples_per_second": 1.235, "step": 576 }, { "epoch": 193.29, "learning_rate": 0.0001, "loss": 0.0045, "step": 580 }, { "epoch": 194.58, "learning_rate": 0.0001, "loss": 0.0027, "step": 584 }, { "epoch": 195.88, "learning_rate": 0.0001, "loss": 0.0024, "step": 588 }, { "epoch": 197.29, "learning_rate": 0.0001, "loss": 0.0034, "step": 592 }, { "epoch": 198.58, "learning_rate": 0.0001, "loss": 0.0037, "step": 596 }, { "epoch": 199.88, "learning_rate": 0.0001, "loss": 0.0037, "step": 600 }, { "epoch": 201.29, "learning_rate": 0.0001, "loss": 0.0036, "step": 604 }, { "epoch": 202.58, "learning_rate": 0.0001, "loss": 0.0029, "step": 608 }, { "epoch": 203.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 612 }, { "epoch": 205.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 616 }, { "epoch": 206.58, "learning_rate": 0.0001, "loss": 0.0038, "step": 620 }, { "epoch": 207.88, "learning_rate": 0.0001, "loss": 0.003, "step": 624 }, { "epoch": 209.29, "learning_rate": 0.0001, "loss": 0.0033, "step": 628 }, { "epoch": 210.58, "learning_rate": 0.0001, "loss": 0.0028, "step": 632 }, { "epoch": 211.88, "learning_rate": 0.0001, "loss": 0.0028, "step": 636 }, { "epoch": 213.29, "learning_rate": 0.0001, "loss": 0.0025, "step": 640 }, { "epoch": 213.29, "eval_exact_match": 0.660541586073501, "eval_exec": 0.648936170212766, "eval_loss": 0.3557709753513336, "eval_runtime": 807.1499, "eval_samples_per_second": 1.281, "step": 640 }, { "epoch": 214.58, "learning_rate": 0.0001, "loss": 0.0024, "step": 644 }, { "epoch": 215.88, "learning_rate": 0.0001, "loss": 0.0034, "step": 648 }, { "epoch": 217.29, "learning_rate": 0.0001, "loss": 0.0023, "step": 652 }, { "epoch": 218.58, "learning_rate": 0.0001, "loss": 0.0021, "step": 656 }, { "epoch": 219.88, "learning_rate": 0.0001, "loss": 0.0026, "step": 660 }, { "epoch": 221.29, "learning_rate": 0.0001, "loss": 0.0035, "step": 664 }, { "epoch": 222.58, "learning_rate": 0.0001, "loss": 0.0022, "step": 668 }, { "epoch": 223.88, "learning_rate": 0.0001, "loss": 0.002, "step": 672 }, { "epoch": 225.29, "learning_rate": 0.0001, "loss": 0.0029, "step": 676 }, { "epoch": 226.58, "learning_rate": 0.0001, "loss": 0.0029, "step": 680 }, { "epoch": 227.88, "learning_rate": 0.0001, "loss": 0.0058, "step": 684 }, { "epoch": 229.29, "learning_rate": 0.0001, "loss": 0.0085, "step": 688 }, { "epoch": 230.58, "learning_rate": 0.0001, "loss": 0.0049, "step": 692 }, { "epoch": 231.88, "learning_rate": 0.0001, "loss": 0.0026, "step": 696 }, { "epoch": 233.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 700 }, { "epoch": 234.58, "learning_rate": 0.0001, "loss": 0.0026, "step": 704 }, { "epoch": 234.58, "eval_exact_match": 0.6682785299806576, "eval_exec": 0.6528046421663443, "eval_loss": 0.34394705295562744, "eval_runtime": 890.2876, "eval_samples_per_second": 1.161, "step": 704 }, { "epoch": 235.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 708 }, { "epoch": 237.29, "learning_rate": 0.0001, "loss": 0.003, "step": 712 }, { "epoch": 238.58, "learning_rate": 0.0001, "loss": 0.0027, "step": 716 }, { "epoch": 239.88, "learning_rate": 0.0001, "loss": 0.0021, "step": 720 }, { "epoch": 241.29, "learning_rate": 0.0001, "loss": 0.0023, "step": 724 }, { "epoch": 242.58, "learning_rate": 0.0001, "loss": 0.0017, "step": 728 }, { "epoch": 243.88, "learning_rate": 0.0001, "loss": 0.0018, "step": 732 }, { "epoch": 245.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 736 }, { "epoch": 246.58, "learning_rate": 0.0001, "loss": 0.0058, "step": 740 }, { "epoch": 247.88, "learning_rate": 0.0001, "loss": 0.0021, "step": 744 }, { "epoch": 249.29, "learning_rate": 0.0001, "loss": 0.0023, "step": 748 }, { "epoch": 250.58, "learning_rate": 0.0001, "loss": 0.003, "step": 752 }, { "epoch": 251.88, "learning_rate": 0.0001, "loss": 0.0027, "step": 756 }, { "epoch": 253.29, "learning_rate": 0.0001, "loss": 0.0029, "step": 760 }, { "epoch": 254.58, "learning_rate": 0.0001, "loss": 0.0023, "step": 764 }, { "epoch": 255.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 768 }, { "epoch": 255.88, "eval_exact_match": 0.6702127659574468, "eval_exec": 0.6576402321083172, "eval_loss": 0.3505505919456482, "eval_runtime": 849.5769, "eval_samples_per_second": 1.217, "step": 768 }, { "epoch": 257.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 772 }, { "epoch": 258.58, "learning_rate": 0.0001, "loss": 0.0017, "step": 776 }, { "epoch": 259.88, "learning_rate": 0.0001, "loss": 0.0023, "step": 780 }, { "epoch": 261.29, "learning_rate": 0.0001, "loss": 0.0031, "step": 784 }, { "epoch": 262.58, "learning_rate": 0.0001, "loss": 0.0015, "step": 788 }, { "epoch": 263.88, "learning_rate": 0.0001, "loss": 0.0018, "step": 792 }, { "epoch": 265.29, "learning_rate": 0.0001, "loss": 0.0019, "step": 796 }, { "epoch": 266.58, "learning_rate": 0.0001, "loss": 0.0017, "step": 800 }, { "epoch": 267.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 804 }, { "epoch": 269.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 808 }, { "epoch": 270.58, "learning_rate": 0.0001, "loss": 0.0021, "step": 812 }, { "epoch": 271.88, "learning_rate": 0.0001, "loss": 0.0022, "step": 816 }, { "epoch": 273.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 820 }, { "epoch": 274.58, "learning_rate": 0.0001, "loss": 0.0016, "step": 824 }, { "epoch": 275.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 828 }, { "epoch": 277.29, "learning_rate": 0.0001, "loss": 0.0017, "step": 832 }, { "epoch": 277.29, "eval_exact_match": 0.6789168278529981, "eval_exec": 0.6615087040618955, "eval_loss": 0.36397507786750793, "eval_runtime": 827.3744, "eval_samples_per_second": 1.25, "step": 832 }, { "epoch": 278.58, "learning_rate": 0.0001, "loss": 0.0016, "step": 836 }, { "epoch": 279.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 840 }, { "epoch": 281.29, "learning_rate": 0.0001, "loss": 0.002, "step": 844 }, { "epoch": 282.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 848 }, { "epoch": 283.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 852 }, { "epoch": 285.29, "learning_rate": 0.0001, "loss": 0.0017, "step": 856 }, { "epoch": 286.58, "learning_rate": 0.0001, "loss": 0.0016, "step": 860 }, { "epoch": 287.88, "learning_rate": 0.0001, "loss": 0.0075, "step": 864 }, { "epoch": 289.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 868 }, { "epoch": 290.58, "learning_rate": 0.0001, "loss": 0.0013, "step": 872 }, { "epoch": 291.88, "learning_rate": 0.0001, "loss": 0.0021, "step": 876 }, { "epoch": 293.29, "learning_rate": 0.0001, "loss": 0.002, "step": 880 }, { "epoch": 294.58, "learning_rate": 0.0001, "loss": 0.0012, "step": 884 }, { "epoch": 295.88, "learning_rate": 0.0001, "loss": 0.0015, "step": 888 }, { "epoch": 297.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 892 }, { "epoch": 298.58, "learning_rate": 0.0001, "loss": 0.0012, "step": 896 }, { "epoch": 298.58, "eval_exact_match": 0.6740812379110251, "eval_exec": 0.6576402321083172, "eval_loss": 0.39234504103660583, "eval_runtime": 785.534, "eval_samples_per_second": 1.316, "step": 896 }, { "epoch": 299.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 900 }, { "epoch": 301.29, "learning_rate": 0.0001, "loss": 0.0069, "step": 904 }, { "epoch": 302.58, "learning_rate": 0.0001, "loss": 0.0018, "step": 908 }, { "epoch": 303.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 912 }, { "epoch": 305.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 916 }, { "epoch": 306.58, "learning_rate": 0.0001, "loss": 0.0012, "step": 920 }, { "epoch": 307.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 924 }, { "epoch": 309.29, "learning_rate": 0.0001, "loss": 0.002, "step": 928 }, { "epoch": 310.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 932 }, { "epoch": 311.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 936 }, { "epoch": 313.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 940 }, { "epoch": 314.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 944 }, { "epoch": 315.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 948 }, { "epoch": 317.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 952 }, { "epoch": 318.58, "learning_rate": 0.0001, "loss": 0.0013, "step": 956 }, { "epoch": 319.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 960 }, { "epoch": 319.88, "eval_exact_match": 0.6740812379110251, "eval_exec": 0.655705996131528, "eval_loss": 0.40166109800338745, "eval_runtime": 799.99, "eval_samples_per_second": 1.293, "step": 960 }, { "epoch": 321.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 964 }, { "epoch": 322.58, "learning_rate": 0.0001, "loss": 0.0012, "step": 968 }, { "epoch": 323.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 972 }, { "epoch": 325.29, "learning_rate": 0.0001, "loss": 0.0021, "step": 976 }, { "epoch": 326.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 980 }, { "epoch": 327.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 984 }, { "epoch": 329.29, "learning_rate": 0.0001, "loss": 0.001, "step": 988 }, { "epoch": 330.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 992 }, { "epoch": 331.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 996 }, { "epoch": 333.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1000 }, { "epoch": 334.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 1004 }, { "epoch": 335.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1008 }, { "epoch": 337.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1012 }, { "epoch": 338.58, "learning_rate": 0.0001, "loss": 0.0015, "step": 1016 }, { "epoch": 339.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1020 }, { "epoch": 341.29, "learning_rate": 0.0001, "loss": 0.0098, "step": 1024 }, { "epoch": 341.29, "eval_exact_match": 0.6963249516441006, "eval_exec": 0.6721470019342359, "eval_loss": 0.3228262662887573, "eval_runtime": 823.0842, "eval_samples_per_second": 1.256, "step": 1024 }, { "epoch": 342.58, "learning_rate": 0.0001, "loss": 0.0016, "step": 1028 }, { "epoch": 343.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1032 }, { "epoch": 345.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1036 }, { "epoch": 346.58, "learning_rate": 0.0001, "loss": 0.0017, "step": 1040 }, { "epoch": 347.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1044 }, { "epoch": 349.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1048 }, { "epoch": 350.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1052 }, { "epoch": 351.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1056 }, { "epoch": 353.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 1060 }, { "epoch": 354.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 1064 }, { "epoch": 355.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1068 }, { "epoch": 357.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 1072 }, { "epoch": 358.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 1076 }, { "epoch": 359.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1080 }, { "epoch": 361.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1084 }, { "epoch": 362.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1088 }, { "epoch": 362.58, "eval_exact_match": 0.6924564796905223, "eval_exec": 0.6721470019342359, "eval_loss": 0.4092560410499573, "eval_runtime": 834.3948, "eval_samples_per_second": 1.239, "step": 1088 }, { "epoch": 363.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1092 }, { "epoch": 365.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1096 }, { "epoch": 366.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1100 }, { "epoch": 367.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1104 }, { "epoch": 369.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1108 }, { "epoch": 370.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1112 }, { "epoch": 371.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1116 }, { "epoch": 373.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1120 }, { "epoch": 374.58, "learning_rate": 0.0001, "loss": 0.0054, "step": 1124 }, { "epoch": 375.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1128 }, { "epoch": 377.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 1132 }, { "epoch": 378.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1136 }, { "epoch": 379.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1140 }, { "epoch": 381.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1144 }, { "epoch": 382.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1148 }, { "epoch": 383.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1152 }, { "epoch": 383.88, "eval_exact_match": 0.6934235976789168, "eval_exec": 0.6818181818181818, "eval_loss": 0.43188637495040894, "eval_runtime": 827.3576, "eval_samples_per_second": 1.25, "step": 1152 }, { "epoch": 385.29, "learning_rate": 0.0001, "loss": 0.0037, "step": 1156 }, { "epoch": 386.58, "learning_rate": 0.0001, "loss": 0.0021, "step": 1160 }, { "epoch": 387.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1164 }, { "epoch": 389.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1168 }, { "epoch": 390.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1172 }, { "epoch": 391.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1176 }, { "epoch": 393.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1180 }, { "epoch": 394.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1184 }, { "epoch": 395.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1188 }, { "epoch": 397.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1192 }, { "epoch": 398.58, "learning_rate": 0.0001, "loss": 0.001, "step": 1196 }, { "epoch": 399.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1200 }, { "epoch": 401.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1204 }, { "epoch": 402.58, "learning_rate": 0.0001, "loss": 0.0014, "step": 1208 }, { "epoch": 403.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1212 }, { "epoch": 405.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 1216 }, { "epoch": 405.29, "eval_exact_match": 0.6750483558994197, "eval_exec": 0.6624758220502901, "eval_loss": 0.38173577189445496, "eval_runtime": 863.9436, "eval_samples_per_second": 1.197, "step": 1216 }, { "epoch": 406.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1220 }, { "epoch": 407.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1224 }, { "epoch": 409.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 1228 }, { "epoch": 410.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1232 }, { "epoch": 411.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1236 }, { "epoch": 413.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1240 }, { "epoch": 414.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1244 }, { "epoch": 415.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1248 }, { "epoch": 417.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1252 }, { "epoch": 418.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1256 }, { "epoch": 419.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1260 }, { "epoch": 421.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 1264 }, { "epoch": 422.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1268 }, { "epoch": 423.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1272 }, { "epoch": 425.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1276 }, { "epoch": 426.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1280 }, { "epoch": 426.58, "eval_exact_match": 0.6808510638297872, "eval_exec": 0.6644100580270793, "eval_loss": 0.42931750416755676, "eval_runtime": 845.9587, "eval_samples_per_second": 1.222, "step": 1280 }, { "epoch": 427.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1284 }, { "epoch": 429.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1288 }, { "epoch": 430.58, "learning_rate": 0.0001, "loss": 0.0011, "step": 1292 }, { "epoch": 431.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1296 }, { "epoch": 433.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1300 }, { "epoch": 434.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1304 }, { "epoch": 435.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1308 }, { "epoch": 437.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1312 }, { "epoch": 438.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1316 }, { "epoch": 439.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1320 }, { "epoch": 441.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1324 }, { "epoch": 442.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1328 }, { "epoch": 443.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1332 }, { "epoch": 445.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1336 }, { "epoch": 446.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1340 }, { "epoch": 447.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1344 }, { "epoch": 447.88, "eval_exact_match": 0.6518375241779497, "eval_exec": 0.6499032882011605, "eval_loss": 0.416789174079895, "eval_runtime": 773.9784, "eval_samples_per_second": 1.336, "step": 1344 }, { "epoch": 449.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1348 }, { "epoch": 450.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1352 }, { "epoch": 451.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1356 }, { "epoch": 453.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1360 }, { "epoch": 454.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1364 }, { "epoch": 455.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1368 }, { "epoch": 457.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1372 }, { "epoch": 458.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1376 }, { "epoch": 459.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1380 }, { "epoch": 461.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1384 }, { "epoch": 462.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1388 }, { "epoch": 463.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1392 }, { "epoch": 465.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1396 }, { "epoch": 466.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1400 }, { "epoch": 467.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1404 }, { "epoch": 469.29, "learning_rate": 0.0001, "loss": 0.0022, "step": 1408 }, { "epoch": 469.29, "eval_exact_match": 0.6750483558994197, "eval_exec": 0.6673114119922631, "eval_loss": 0.36535608768463135, "eval_runtime": 954.0188, "eval_samples_per_second": 1.084, "step": 1408 }, { "epoch": 470.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1412 }, { "epoch": 471.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1416 }, { "epoch": 473.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1420 }, { "epoch": 474.58, "learning_rate": 0.0001, "loss": 0.001, "step": 1424 }, { "epoch": 475.88, "learning_rate": 0.0001, "loss": 0.008, "step": 1428 }, { "epoch": 477.29, "learning_rate": 0.0001, "loss": 0.0043, "step": 1432 }, { "epoch": 478.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1436 }, { "epoch": 479.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1440 }, { "epoch": 481.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1444 }, { "epoch": 482.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1448 }, { "epoch": 483.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1452 }, { "epoch": 485.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1456 }, { "epoch": 486.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1460 }, { "epoch": 487.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1464 }, { "epoch": 489.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1468 }, { "epoch": 490.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1472 }, { "epoch": 490.58, "eval_exact_match": 0.6818181818181818, "eval_exec": 0.6740812379110251, "eval_loss": 0.41243937611579895, "eval_runtime": 955.6213, "eval_samples_per_second": 1.082, "step": 1472 }, { "epoch": 491.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1476 }, { "epoch": 493.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1480 }, { "epoch": 494.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1484 }, { "epoch": 495.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1488 }, { "epoch": 497.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1492 }, { "epoch": 498.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1496 }, { "epoch": 499.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1500 }, { "epoch": 501.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1504 }, { "epoch": 502.58, "learning_rate": 0.0001, "loss": 0.0007, "step": 1508 }, { "epoch": 503.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1512 }, { "epoch": 505.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1516 }, { "epoch": 506.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1520 }, { "epoch": 507.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1524 }, { "epoch": 509.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 1528 }, { "epoch": 510.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1532 }, { "epoch": 511.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1536 }, { "epoch": 511.88, "eval_exact_match": 0.6856866537717602, "eval_exec": 0.6798839458413927, "eval_loss": 0.4265560507774353, "eval_runtime": 1299.537, "eval_samples_per_second": 0.796, "step": 1536 }, { "epoch": 513.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1540 }, { "epoch": 514.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1544 }, { "epoch": 515.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1548 }, { "epoch": 517.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1552 }, { "epoch": 518.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1556 }, { "epoch": 519.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1560 }, { "epoch": 521.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1564 }, { "epoch": 522.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1568 }, { "epoch": 523.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1572 }, { "epoch": 525.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1576 }, { "epoch": 526.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1580 }, { "epoch": 527.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1584 }, { "epoch": 529.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1588 }, { "epoch": 530.58, "learning_rate": 0.0001, "loss": 0.0014, "step": 1592 }, { "epoch": 531.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1596 }, { "epoch": 533.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1600 }, { "epoch": 533.29, "eval_exact_match": 0.6856866537717602, "eval_exec": 0.6779497098646035, "eval_loss": 0.4029601216316223, "eval_runtime": 999.4683, "eval_samples_per_second": 1.035, "step": 1600 }, { "epoch": 534.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1604 }, { "epoch": 535.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1608 }, { "epoch": 537.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1612 }, { "epoch": 538.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1616 }, { "epoch": 539.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1620 }, { "epoch": 541.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1624 }, { "epoch": 542.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1628 }, { "epoch": 543.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1632 }, { "epoch": 545.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 1636 }, { "epoch": 546.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1640 }, { "epoch": 547.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1644 }, { "epoch": 549.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1648 }, { "epoch": 550.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1652 }, { "epoch": 551.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1656 }, { "epoch": 553.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1660 }, { "epoch": 554.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1664 }, { "epoch": 554.58, "eval_exact_match": 0.6934235976789168, "eval_exec": 0.6827852998065764, "eval_loss": 0.44639065861701965, "eval_runtime": 1031.7773, "eval_samples_per_second": 1.002, "step": 1664 }, { "epoch": 555.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1668 }, { "epoch": 557.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1672 }, { "epoch": 558.58, "learning_rate": 0.0001, "loss": 0.0025, "step": 1676 }, { "epoch": 559.88, "learning_rate": 0.0001, "loss": 0.0029, "step": 1680 }, { "epoch": 561.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1684 }, { "epoch": 562.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1688 }, { "epoch": 563.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1692 }, { "epoch": 565.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1696 }, { "epoch": 566.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1700 }, { "epoch": 567.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1704 }, { "epoch": 569.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1708 }, { "epoch": 570.58, "learning_rate": 0.0001, "loss": 0.0018, "step": 1712 }, { "epoch": 571.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1716 }, { "epoch": 573.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1720 }, { "epoch": 574.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 1724 }, { "epoch": 575.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1728 }, { "epoch": 575.88, "eval_exact_match": 0.6876208897485493, "eval_exec": 0.6760154738878144, "eval_loss": 0.41115066409111023, "eval_runtime": 1007.2726, "eval_samples_per_second": 1.027, "step": 1728 }, { "epoch": 577.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1732 }, { "epoch": 578.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1736 }, { "epoch": 579.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1740 }, { "epoch": 581.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1744 }, { "epoch": 582.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1748 }, { "epoch": 583.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1752 }, { "epoch": 585.29, "learning_rate": 0.0001, "loss": 0.0029, "step": 1756 }, { "epoch": 586.58, "learning_rate": 0.0001, "loss": 0.0021, "step": 1760 }, { "epoch": 587.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1764 }, { "epoch": 589.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1768 }, { "epoch": 590.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1772 }, { "epoch": 591.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1776 }, { "epoch": 593.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1780 }, { "epoch": 594.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1784 }, { "epoch": 595.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1788 }, { "epoch": 597.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1792 }, { "epoch": 597.29, "eval_exact_match": 0.6943907156673114, "eval_exec": 0.6779497098646035, "eval_loss": 0.4605121910572052, "eval_runtime": 1298.121, "eval_samples_per_second": 0.797, "step": 1792 }, { "epoch": 598.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1796 }, { "epoch": 599.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1800 }, { "epoch": 601.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1804 }, { "epoch": 602.58, "learning_rate": 0.0001, "loss": 0.0008, "step": 1808 }, { "epoch": 603.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1812 }, { "epoch": 605.29, "learning_rate": 0.0001, "loss": 0.0041, "step": 1816 }, { "epoch": 606.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1820 }, { "epoch": 607.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 1824 }, { "epoch": 609.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1828 }, { "epoch": 610.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1832 }, { "epoch": 611.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1836 }, { "epoch": 613.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 1840 }, { "epoch": 614.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1844 }, { "epoch": 615.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 1848 }, { "epoch": 617.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1852 }, { "epoch": 618.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1856 }, { "epoch": 618.58, "eval_exact_match": 0.6982591876208898, "eval_exec": 0.683752417794971, "eval_loss": 0.4248420000076294, "eval_runtime": 1371.1332, "eval_samples_per_second": 0.754, "step": 1856 }, { "epoch": 619.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1860 }, { "epoch": 621.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1864 }, { "epoch": 622.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1868 }, { "epoch": 623.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1872 }, { "epoch": 625.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1876 }, { "epoch": 626.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1880 }, { "epoch": 627.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1884 }, { "epoch": 629.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 1888 }, { "epoch": 630.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1892 }, { "epoch": 631.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1896 }, { "epoch": 633.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1900 }, { "epoch": 634.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1904 }, { "epoch": 635.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1908 }, { "epoch": 637.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1912 }, { "epoch": 638.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1916 }, { "epoch": 639.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1920 }, { "epoch": 639.88, "eval_exact_match": 0.6847195357833655, "eval_exec": 0.6769825918762089, "eval_loss": 0.45460787415504456, "eval_runtime": 1038.9518, "eval_samples_per_second": 0.995, "step": 1920 }, { "epoch": 641.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1924 }, { "epoch": 642.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1928 }, { "epoch": 643.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1932 }, { "epoch": 645.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1936 }, { "epoch": 646.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 1940 }, { "epoch": 647.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 1944 }, { "epoch": 649.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1948 }, { "epoch": 650.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 1952 }, { "epoch": 651.88, "learning_rate": 0.0001, "loss": 0.0005, "step": 1956 }, { "epoch": 653.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 1960 }, { "epoch": 654.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1964 }, { "epoch": 655.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1968 }, { "epoch": 657.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 1972 }, { "epoch": 658.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 1976 }, { "epoch": 659.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 1980 }, { "epoch": 661.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 1984 }, { "epoch": 661.29, "eval_exact_match": 0.6856866537717602, "eval_exec": 0.6808510638297872, "eval_loss": 0.4657697379589081, "eval_runtime": 1097.6691, "eval_samples_per_second": 0.942, "step": 1984 }, { "epoch": 662.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 1988 }, { "epoch": 663.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 1992 }, { "epoch": 665.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 1996 }, { "epoch": 666.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2000 }, { "epoch": 667.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2004 }, { "epoch": 669.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2008 }, { "epoch": 670.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2012 }, { "epoch": 671.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2016 }, { "epoch": 673.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2020 }, { "epoch": 674.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 2024 }, { "epoch": 675.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2028 }, { "epoch": 677.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2032 }, { "epoch": 678.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 2036 }, { "epoch": 679.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2040 }, { "epoch": 681.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2044 }, { "epoch": 682.58, "learning_rate": 0.0001, "loss": 0.0037, "step": 2048 }, { "epoch": 682.58, "eval_exact_match": 0.6914893617021277, "eval_exec": 0.690522243713733, "eval_loss": 0.4180769622325897, "eval_runtime": 1092.62, "eval_samples_per_second": 0.946, "step": 2048 }, { "epoch": 683.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2052 }, { "epoch": 685.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2056 }, { "epoch": 686.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 2060 }, { "epoch": 687.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 2064 }, { "epoch": 689.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 2068 }, { "epoch": 690.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 2072 }, { "epoch": 691.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2076 }, { "epoch": 693.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2080 }, { "epoch": 694.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 2084 }, { "epoch": 695.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2088 }, { "epoch": 697.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2092 }, { "epoch": 698.58, "learning_rate": 0.0001, "loss": 0.0012, "step": 2096 }, { "epoch": 699.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 2100 }, { "epoch": 701.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2104 }, { "epoch": 702.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 2108 }, { "epoch": 703.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2112 }, { "epoch": 703.88, "eval_exact_match": 0.6943907156673114, "eval_exec": 0.6924564796905223, "eval_loss": 0.44465285539627075, "eval_runtime": 999.8273, "eval_samples_per_second": 1.034, "step": 2112 }, { "epoch": 705.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 2116 }, { "epoch": 706.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 2120 }, { "epoch": 707.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2124 }, { "epoch": 709.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2128 }, { "epoch": 710.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 2132 }, { "epoch": 711.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2136 }, { "epoch": 713.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 2140 }, { "epoch": 714.58, "learning_rate": 0.0001, "loss": 0.0053, "step": 2144 }, { "epoch": 715.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 2148 }, { "epoch": 717.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2152 }, { "epoch": 718.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2156 }, { "epoch": 719.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2160 }, { "epoch": 721.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2164 }, { "epoch": 722.58, "learning_rate": 0.0001, "loss": 0.0013, "step": 2168 }, { "epoch": 723.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 2172 }, { "epoch": 725.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2176 }, { "epoch": 725.29, "eval_exact_match": 0.6992263056092843, "eval_exec": 0.6876208897485493, "eval_loss": 0.4321630299091339, "eval_runtime": 878.8579, "eval_samples_per_second": 1.177, "step": 2176 }, { "epoch": 726.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2180 }, { "epoch": 727.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2184 }, { "epoch": 729.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2188 }, { "epoch": 730.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2192 }, { "epoch": 731.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2196 }, { "epoch": 733.29, "learning_rate": 0.0001, "loss": 0.0007, "step": 2200 }, { "epoch": 734.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2204 }, { "epoch": 735.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2208 }, { "epoch": 737.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2212 }, { "epoch": 738.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2216 }, { "epoch": 739.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2220 }, { "epoch": 741.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2224 }, { "epoch": 742.58, "learning_rate": 0.0001, "loss": 0.0003, "step": 2228 }, { "epoch": 743.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2232 }, { "epoch": 745.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2236 }, { "epoch": 746.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2240 }, { "epoch": 746.58, "eval_exact_match": 0.6963249516441006, "eval_exec": 0.6789168278529981, "eval_loss": 0.45437225699424744, "eval_runtime": 864.1019, "eval_samples_per_second": 1.197, "step": 2240 }, { "epoch": 747.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2244 }, { "epoch": 749.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2248 }, { "epoch": 750.58, "learning_rate": 0.0001, "loss": 0.0006, "step": 2252 }, { "epoch": 751.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2256 }, { "epoch": 753.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2260 }, { "epoch": 754.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2264 }, { "epoch": 755.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2268 }, { "epoch": 757.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2272 }, { "epoch": 758.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2276 }, { "epoch": 759.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2280 }, { "epoch": 761.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2284 }, { "epoch": 762.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2288 }, { "epoch": 763.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2292 }, { "epoch": 765.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2296 }, { "epoch": 766.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2300 }, { "epoch": 767.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2304 }, { "epoch": 767.88, "eval_exact_match": 0.6963249516441006, "eval_exec": 0.6866537717601547, "eval_loss": 0.4832020401954651, "eval_runtime": 816.387, "eval_samples_per_second": 1.267, "step": 2304 }, { "epoch": 769.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2308 }, { "epoch": 770.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 2312 }, { "epoch": 771.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2316 }, { "epoch": 773.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 2320 }, { "epoch": 774.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2324 }, { "epoch": 775.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 2328 }, { "epoch": 777.29, "learning_rate": 0.0001, "loss": 0.0211, "step": 2332 }, { "epoch": 778.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2336 }, { "epoch": 779.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2340 }, { "epoch": 781.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2344 }, { "epoch": 782.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2348 }, { "epoch": 783.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2352 }, { "epoch": 785.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 2356 }, { "epoch": 786.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2360 }, { "epoch": 787.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2364 }, { "epoch": 789.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2368 }, { "epoch": 789.29, "eval_exact_match": 0.6924564796905223, "eval_exec": 0.6779497098646035, "eval_loss": 0.47273918986320496, "eval_runtime": 826.2169, "eval_samples_per_second": 1.251, "step": 2368 }, { "epoch": 790.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2372 }, { "epoch": 791.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2376 }, { "epoch": 793.29, "learning_rate": 0.0001, "loss": 0.0004, "step": 2380 }, { "epoch": 794.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2384 }, { "epoch": 795.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2388 }, { "epoch": 797.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2392 }, { "epoch": 798.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2396 }, { "epoch": 799.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2400 }, { "epoch": 801.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2404 }, { "epoch": 802.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2408 }, { "epoch": 803.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2412 }, { "epoch": 805.29, "learning_rate": 0.0001, "loss": 0.0, "step": 2416 }, { "epoch": 806.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2420 }, { "epoch": 807.88, "learning_rate": 0.0001, "loss": 0.0, "step": 2424 }, { "epoch": 809.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2428 }, { "epoch": 810.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2432 }, { "epoch": 810.58, "eval_exact_match": 0.6808510638297872, "eval_exec": 0.6692456479690522, "eval_loss": 0.48608434200286865, "eval_runtime": 825.1274, "eval_samples_per_second": 1.253, "step": 2432 }, { "epoch": 811.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2436 }, { "epoch": 813.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2440 }, { "epoch": 814.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2444 }, { "epoch": 815.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2448 }, { "epoch": 817.29, "learning_rate": 0.0001, "loss": 0.0006, "step": 2452 }, { "epoch": 818.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2456 }, { "epoch": 819.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2460 }, { "epoch": 821.29, "learning_rate": 0.0001, "loss": 0.0003, "step": 2464 }, { "epoch": 822.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 2468 }, { "epoch": 823.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2472 }, { "epoch": 825.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2476 }, { "epoch": 826.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2480 }, { "epoch": 827.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2484 }, { "epoch": 829.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2488 }, { "epoch": 830.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 2492 }, { "epoch": 831.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2496 }, { "epoch": 831.88, "eval_exact_match": 0.6934235976789168, "eval_exec": 0.6750483558994197, "eval_loss": 0.47877049446105957, "eval_runtime": 803.416, "eval_samples_per_second": 1.287, "step": 2496 }, { "epoch": 833.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2500 }, { "epoch": 834.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2504 }, { "epoch": 835.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2508 }, { "epoch": 837.29, "learning_rate": 0.0001, "loss": 0.0005, "step": 2512 }, { "epoch": 838.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2516 }, { "epoch": 839.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2520 }, { "epoch": 841.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2524 }, { "epoch": 842.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2528 }, { "epoch": 843.88, "learning_rate": 0.0001, "loss": 0.0006, "step": 2532 }, { "epoch": 845.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2536 }, { "epoch": 846.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2540 }, { "epoch": 847.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2544 }, { "epoch": 849.29, "learning_rate": 0.0001, "loss": 0.0002, "step": 2548 }, { "epoch": 850.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2552 }, { "epoch": 851.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2556 }, { "epoch": 853.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2560 }, { "epoch": 853.29, "eval_exact_match": 0.6914893617021277, "eval_exec": 0.6779497098646035, "eval_loss": 0.4677638113498688, "eval_runtime": 845.5605, "eval_samples_per_second": 1.223, "step": 2560 }, { "epoch": 854.58, "learning_rate": 0.0001, "loss": 0.0005, "step": 2564 }, { "epoch": 855.88, "learning_rate": 0.0001, "loss": 0.0004, "step": 2568 }, { "epoch": 857.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 2572 }, { "epoch": 858.58, "learning_rate": 0.0001, "loss": 0.0004, "step": 2576 }, { "epoch": 859.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2580 }, { "epoch": 861.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2584 }, { "epoch": 862.58, "learning_rate": 0.0001, "loss": 0.0002, "step": 2588 }, { "epoch": 863.88, "learning_rate": 0.0001, "loss": 0.0002, "step": 2592 }, { "epoch": 865.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2596 }, { "epoch": 866.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2600 }, { "epoch": 867.88, "learning_rate": 0.0001, "loss": 0.0003, "step": 2604 }, { "epoch": 869.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2608 }, { "epoch": 870.58, "learning_rate": 0.0001, "loss": 0.0001, "step": 2612 }, { "epoch": 871.88, "learning_rate": 0.0001, "loss": 0.0001, "step": 2616 }, { "epoch": 873.29, "learning_rate": 0.0001, "loss": 0.0001, "step": 2620 }, { "epoch": 874.58, "learning_rate": 0.0001, "loss": 0.0, "step": 2624 }, { "epoch": 874.58, "eval_exact_match": 0.7117988394584139, "eval_exec": 0.695357833655706, "eval_loss": 0.487076997756958, "eval_runtime": 1063.4572, "eval_samples_per_second": 0.972, "step": 2624 } ], "max_steps": 9216, "num_train_epochs": 3072, "total_flos": 1.20676205882696e+19, "trial_name": null, "trial_params": null }