{ "best_metric": 0.6417290108063175, "best_model_checkpoint": "./experiment/train_0429_sparc_add_coref/checkpoint-2432", "epoch": 270.2147651006711, "global_step": 2432, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0001, "loss": 3.5405, "step": 1 }, { "epoch": 0.86, "learning_rate": 0.0001, "loss": 2.3105, "step": 8 }, { "epoch": 1.75, "learning_rate": 0.0001, "loss": 1.1917, "step": 16 }, { "epoch": 2.64, "learning_rate": 0.0001, "loss": 0.57, "step": 24 }, { "epoch": 3.54, "learning_rate": 0.0001, "loss": 0.3591, "step": 32 }, { "epoch": 4.43, "learning_rate": 0.0001, "loss": 0.2618, "step": 40 }, { "epoch": 5.32, "learning_rate": 0.0001, "loss": 0.2122, "step": 48 }, { "epoch": 6.21, "learning_rate": 0.0001, "loss": 0.1584, "step": 56 }, { "epoch": 7.11, "learning_rate": 0.0001, "loss": 0.1421, "step": 64 }, { "epoch": 7.11, "eval_exact_match": 0.39068994181213634, "eval_exec": 0.457190357439734, "eval_loss": 0.1790621280670166, "eval_runtime": 891.1645, "eval_samples_per_second": 1.823, "step": 64 }, { "epoch": 7.97, "learning_rate": 0.0001, "loss": 0.1129, "step": 72 }, { "epoch": 8.86, "learning_rate": 0.0001, "loss": 0.1136, "step": 80 }, { "epoch": 9.75, "learning_rate": 0.0001, "loss": 0.102, "step": 88 }, { "epoch": 10.64, "learning_rate": 0.0001, "loss": 0.0846, "step": 96 }, { "epoch": 11.54, "learning_rate": 0.0001, "loss": 0.0752, "step": 104 }, { "epoch": 12.43, "learning_rate": 0.0001, "loss": 0.0681, "step": 112 }, { "epoch": 13.32, "learning_rate": 0.0001, "loss": 0.0655, "step": 120 }, { "epoch": 14.21, "learning_rate": 0.0001, "loss": 0.0551, "step": 128 }, { "epoch": 14.21, "eval_exact_match": 0.5586034912718204, "eval_exec": 0.6226101413133832, "eval_loss": 0.16853797435760498, "eval_runtime": 623.0506, "eval_samples_per_second": 2.608, "step": 128 }, { "epoch": 15.11, "learning_rate": 0.0001, "loss": 0.0499, "step": 136 }, { "epoch": 15.97, "learning_rate": 0.0001, "loss": 0.0572, "step": 144 }, { "epoch": 16.86, "learning_rate": 0.0001, "loss": 0.0459, "step": 152 }, { "epoch": 17.75, "learning_rate": 0.0001, "loss": 0.0386, "step": 160 }, { "epoch": 18.64, "learning_rate": 0.0001, "loss": 0.0342, "step": 168 }, { "epoch": 19.54, "learning_rate": 0.0001, "loss": 0.0315, "step": 176 }, { "epoch": 20.43, "learning_rate": 0.0001, "loss": 0.0282, "step": 184 }, { "epoch": 21.32, "learning_rate": 0.0001, "loss": 0.027, "step": 192 }, { "epoch": 21.32, "eval_exact_match": 0.5810473815461347, "eval_exec": 0.6342477140482128, "eval_loss": 0.21147404611110687, "eval_runtime": 655.5734, "eval_samples_per_second": 2.479, "step": 192 }, { "epoch": 22.21, "learning_rate": 0.0001, "loss": 0.0233, "step": 200 }, { "epoch": 23.11, "learning_rate": 0.0001, "loss": 0.0231, "step": 208 }, { "epoch": 23.97, "learning_rate": 0.0001, "loss": 0.0431, "step": 216 }, { "epoch": 24.86, "learning_rate": 0.0001, "loss": 0.0212, "step": 224 }, { "epoch": 25.75, "learning_rate": 0.0001, "loss": 0.018, "step": 232 }, { "epoch": 26.64, "learning_rate": 0.0001, "loss": 0.0153, "step": 240 }, { "epoch": 27.54, "learning_rate": 0.0001, "loss": 0.0144, "step": 248 }, { "epoch": 28.43, "learning_rate": 0.0001, "loss": 0.0156, "step": 256 }, { "epoch": 28.43, "eval_exact_match": 0.5818786367414797, "eval_exec": 0.6408977556109726, "eval_loss": 0.23599478602409363, "eval_runtime": 661.7445, "eval_samples_per_second": 2.456, "step": 256 }, { "epoch": 29.32, "learning_rate": 0.0001, "loss": 0.0141, "step": 264 }, { "epoch": 30.21, "learning_rate": 0.0001, "loss": 0.012, "step": 272 }, { "epoch": 31.11, "learning_rate": 0.0001, "loss": 0.0115, "step": 280 }, { "epoch": 31.97, "learning_rate": 0.0001, "loss": 0.0113, "step": 288 }, { "epoch": 32.86, "learning_rate": 0.0001, "loss": 0.0107, "step": 296 }, { "epoch": 33.75, "learning_rate": 0.0001, "loss": 0.0096, "step": 304 }, { "epoch": 34.64, "learning_rate": 0.0001, "loss": 0.01, "step": 312 }, { "epoch": 35.54, "learning_rate": 0.0001, "loss": 0.0084, "step": 320 }, { "epoch": 35.54, "eval_exact_match": 0.5985037406483791, "eval_exec": 0.655860349127182, "eval_loss": 0.26971787214279175, "eval_runtime": 691.9565, "eval_samples_per_second": 2.348, "step": 320 }, { "epoch": 36.43, "learning_rate": 0.0001, "loss": 0.0084, "step": 328 }, { "epoch": 37.32, "learning_rate": 0.0001, "loss": 0.0098, "step": 336 }, { "epoch": 38.21, "learning_rate": 0.0001, "loss": 0.0206, "step": 344 }, { "epoch": 39.11, "learning_rate": 0.0001, "loss": 0.0071, "step": 352 }, { "epoch": 39.97, "learning_rate": 0.0001, "loss": 0.0061, "step": 360 }, { "epoch": 40.86, "learning_rate": 0.0001, "loss": 0.0061, "step": 368 }, { "epoch": 41.75, "learning_rate": 0.0001, "loss": 0.0059, "step": 376 }, { "epoch": 42.64, "learning_rate": 0.0001, "loss": 0.0099, "step": 384 }, { "epoch": 42.64, "eval_exact_match": 0.5901911886949294, "eval_exec": 0.6367414796342478, "eval_loss": 0.2628422975540161, "eval_runtime": 655.666, "eval_samples_per_second": 2.478, "step": 384 }, { "epoch": 43.54, "learning_rate": 0.0001, "loss": 0.0055, "step": 392 }, { "epoch": 44.43, "learning_rate": 0.0001, "loss": 0.0052, "step": 400 }, { "epoch": 45.32, "learning_rate": 0.0001, "loss": 0.0053, "step": 408 }, { "epoch": 46.21, "learning_rate": 0.0001, "loss": 0.005, "step": 416 }, { "epoch": 47.11, "learning_rate": 0.0001, "loss": 0.0048, "step": 424 }, { "epoch": 47.97, "learning_rate": 0.0001, "loss": 0.0042, "step": 432 }, { "epoch": 48.86, "learning_rate": 0.0001, "loss": 0.0049, "step": 440 }, { "epoch": 49.75, "learning_rate": 0.0001, "loss": 0.0039, "step": 448 }, { "epoch": 49.75, "eval_exact_match": 0.5985037406483791, "eval_exec": 0.6517040731504572, "eval_loss": 0.3072524070739746, "eval_runtime": 692.7017, "eval_samples_per_second": 2.346, "step": 448 }, { "epoch": 50.64, "learning_rate": 0.0001, "loss": 0.0047, "step": 456 }, { "epoch": 51.54, "learning_rate": 0.0001, "loss": 0.0039, "step": 464 }, { "epoch": 52.43, "learning_rate": 0.0001, "loss": 0.0033, "step": 472 }, { "epoch": 53.32, "learning_rate": 0.0001, "loss": 0.0037, "step": 480 }, { "epoch": 54.21, "learning_rate": 0.0001, "loss": 0.0034, "step": 488 }, { "epoch": 55.11, "learning_rate": 0.0001, "loss": 0.0038, "step": 496 }, { "epoch": 55.97, "learning_rate": 0.0001, "loss": 0.003, "step": 504 }, { "epoch": 56.86, "learning_rate": 0.0001, "loss": 0.0033, "step": 512 }, { "epoch": 56.86, "eval_exact_match": 0.5993349958437241, "eval_exec": 0.6492103075644223, "eval_loss": 0.3246177136898041, "eval_runtime": 716.2484, "eval_samples_per_second": 2.269, "step": 512 }, { "epoch": 57.75, "learning_rate": 0.0001, "loss": 0.0033, "step": 520 }, { "epoch": 58.64, "learning_rate": 0.0001, "loss": 0.0038, "step": 528 }, { "epoch": 59.54, "learning_rate": 0.0001, "loss": 0.0031, "step": 536 }, { "epoch": 60.43, "learning_rate": 0.0001, "loss": 0.0032, "step": 544 }, { "epoch": 61.32, "learning_rate": 0.0001, "loss": 0.004, "step": 552 }, { "epoch": 62.21, "learning_rate": 0.0001, "loss": 0.0033, "step": 560 }, { "epoch": 63.11, "learning_rate": 0.0001, "loss": 0.0145, "step": 568 }, { "epoch": 63.97, "learning_rate": 0.0001, "loss": 0.0028, "step": 576 }, { "epoch": 63.97, "eval_exact_match": 0.5935162094763092, "eval_exec": 0.6433915211970075, "eval_loss": 0.31884506344795227, "eval_runtime": 835.1344, "eval_samples_per_second": 1.946, "step": 576 }, { "epoch": 64.86, "learning_rate": 0.0001, "loss": 0.0028, "step": 584 }, { "epoch": 65.75, "learning_rate": 0.0001, "loss": 0.0024, "step": 592 }, { "epoch": 66.64, "learning_rate": 0.0001, "loss": 0.0024, "step": 600 }, { "epoch": 67.54, "learning_rate": 0.0001, "loss": 0.0026, "step": 608 }, { "epoch": 68.43, "learning_rate": 0.0001, "loss": 0.0026, "step": 616 }, { "epoch": 69.32, "learning_rate": 0.0001, "loss": 0.0022, "step": 624 }, { "epoch": 70.21, "learning_rate": 0.0001, "loss": 0.0024, "step": 632 }, { "epoch": 71.11, "learning_rate": 0.0001, "loss": 0.0021, "step": 640 }, { "epoch": 71.11, "eval_exact_match": 0.6018287614297589, "eval_exec": 0.6583541147132169, "eval_loss": 0.3463212847709656, "eval_runtime": 682.2964, "eval_samples_per_second": 2.382, "step": 640 }, { "epoch": 71.97, "learning_rate": 0.0001, "loss": 0.0022, "step": 648 }, { "epoch": 72.86, "learning_rate": 0.0001, "loss": 0.002, "step": 656 }, { "epoch": 73.75, "learning_rate": 0.0001, "loss": 0.0022, "step": 664 }, { "epoch": 74.64, "learning_rate": 0.0001, "loss": 0.002, "step": 672 }, { "epoch": 75.54, "learning_rate": 0.0001, "loss": 0.002, "step": 680 }, { "epoch": 76.43, "learning_rate": 0.0001, "loss": 0.002, "step": 688 }, { "epoch": 77.32, "learning_rate": 0.0001, "loss": 0.002, "step": 696 }, { "epoch": 78.21, "learning_rate": 0.0001, "loss": 0.0022, "step": 704 }, { "epoch": 78.21, "eval_exact_match": 0.6043225270157938, "eval_exec": 0.6583541147132169, "eval_loss": 0.34072062373161316, "eval_runtime": 645.8928, "eval_samples_per_second": 2.516, "step": 704 }, { "epoch": 79.11, "learning_rate": 0.0001, "loss": 0.0025, "step": 712 }, { "epoch": 79.97, "learning_rate": 0.0001, "loss": 0.0021, "step": 720 }, { "epoch": 80.86, "learning_rate": 0.0001, "loss": 0.0028, "step": 728 }, { "epoch": 81.75, "learning_rate": 0.0001, "loss": 0.0199, "step": 736 }, { "epoch": 82.64, "learning_rate": 0.0001, "loss": 0.0017, "step": 744 }, { "epoch": 83.54, "learning_rate": 0.0001, "loss": 0.0019, "step": 752 }, { "epoch": 84.43, "learning_rate": 0.0001, "loss": 0.0015, "step": 760 }, { "epoch": 85.32, "learning_rate": 0.0001, "loss": 0.002, "step": 768 }, { "epoch": 85.32, "eval_exact_match": 0.5968412302576891, "eval_exec": 0.6517040731504572, "eval_loss": 0.34500738978385925, "eval_runtime": 667.4413, "eval_samples_per_second": 2.435, "step": 768 }, { "epoch": 86.21, "learning_rate": 0.0001, "loss": 0.0016, "step": 776 }, { "epoch": 87.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 784 }, { "epoch": 87.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 792 }, { "epoch": 88.86, "learning_rate": 0.0001, "loss": 0.0016, "step": 800 }, { "epoch": 89.75, "learning_rate": 0.0001, "loss": 0.0016, "step": 808 }, { "epoch": 90.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 816 }, { "epoch": 91.54, "learning_rate": 0.0001, "loss": 0.0016, "step": 824 }, { "epoch": 92.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 832 }, { "epoch": 92.43, "eval_exact_match": 0.5985037406483791, "eval_exec": 0.656691604322527, "eval_loss": 0.35779085755348206, "eval_runtime": 620.7719, "eval_samples_per_second": 2.618, "step": 832 }, { "epoch": 93.32, "learning_rate": 0.0001, "loss": 0.0014, "step": 840 }, { "epoch": 94.21, "learning_rate": 0.0001, "loss": 0.0013, "step": 848 }, { "epoch": 95.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 856 }, { "epoch": 95.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 864 }, { "epoch": 96.86, "learning_rate": 0.0001, "loss": 0.0014, "step": 872 }, { "epoch": 97.75, "learning_rate": 0.0001, "loss": 0.0015, "step": 880 }, { "epoch": 98.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 888 }, { "epoch": 99.54, "learning_rate": 0.0001, "loss": 0.0012, "step": 896 }, { "epoch": 99.54, "eval_exact_match": 0.6051537822111388, "eval_exec": 0.6508728179551122, "eval_loss": 0.37539783120155334, "eval_runtime": 640.8712, "eval_samples_per_second": 2.536, "step": 896 }, { "epoch": 100.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 904 }, { "epoch": 101.32, "learning_rate": 0.0001, "loss": 0.0021, "step": 912 }, { "epoch": 102.21, "learning_rate": 0.0001, "loss": 0.0013, "step": 920 }, { "epoch": 103.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 928 }, { "epoch": 103.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 936 }, { "epoch": 104.86, "learning_rate": 0.0001, "loss": 0.0014, "step": 944 }, { "epoch": 105.75, "learning_rate": 0.0001, "loss": 0.0023, "step": 952 }, { "epoch": 106.64, "learning_rate": 0.0001, "loss": 0.0014, "step": 960 }, { "epoch": 106.64, "eval_exact_match": 0.6101413133832086, "eval_exec": 0.6550290939318371, "eval_loss": 0.37180858850479126, "eval_runtime": 660.4887, "eval_samples_per_second": 2.46, "step": 960 }, { "epoch": 107.54, "learning_rate": 0.0001, "loss": 0.0025, "step": 968 }, { "epoch": 108.43, "learning_rate": 0.0001, "loss": 0.0022, "step": 976 }, { "epoch": 109.32, "learning_rate": 0.0001, "loss": 0.0014, "step": 984 }, { "epoch": 110.21, "learning_rate": 0.0001, "loss": 0.0014, "step": 992 }, { "epoch": 111.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 1000 }, { "epoch": 111.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1008 }, { "epoch": 112.86, "learning_rate": 0.0001, "loss": 0.0013, "step": 1016 }, { "epoch": 113.75, "learning_rate": 0.0001, "loss": 0.0017, "step": 1024 }, { "epoch": 113.75, "eval_exact_match": 0.5943474646716542, "eval_exec": 0.6458852867830424, "eval_loss": 0.37901195883750916, "eval_runtime": 727.9232, "eval_samples_per_second": 2.232, "step": 1024 }, { "epoch": 114.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 1032 }, { "epoch": 115.54, "learning_rate": 0.0001, "loss": 0.0013, "step": 1040 }, { "epoch": 116.43, "learning_rate": 0.0001, "loss": 0.0016, "step": 1048 }, { "epoch": 117.32, "learning_rate": 0.0001, "loss": 0.0014, "step": 1056 }, { "epoch": 118.21, "learning_rate": 0.0001, "loss": 0.0014, "step": 1064 }, { "epoch": 119.11, "learning_rate": 0.0001, "loss": 0.0016, "step": 1072 }, { "epoch": 119.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1080 }, { "epoch": 120.86, "learning_rate": 0.0001, "loss": 0.0017, "step": 1088 }, { "epoch": 120.86, "eval_exact_match": 0.6126350789692435, "eval_exec": 0.6600166251039069, "eval_loss": 0.3471900224685669, "eval_runtime": 623.9447, "eval_samples_per_second": 2.604, "step": 1088 }, { "epoch": 121.75, "learning_rate": 0.0001, "loss": 0.0013, "step": 1096 }, { "epoch": 122.64, "learning_rate": 0.0001, "loss": 0.0141, "step": 1104 }, { "epoch": 123.54, "learning_rate": 0.0001, "loss": 0.0032, "step": 1112 }, { "epoch": 124.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 1120 }, { "epoch": 125.32, "learning_rate": 0.0001, "loss": 0.0012, "step": 1128 }, { "epoch": 126.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1136 }, { "epoch": 127.11, "learning_rate": 0.0001, "loss": 0.0061, "step": 1144 }, { "epoch": 127.97, "learning_rate": 0.0001, "loss": 0.0017, "step": 1152 }, { "epoch": 127.97, "eval_exact_match": 0.6126350789692435, "eval_exec": 0.6517040731504572, "eval_loss": 0.3467860519886017, "eval_runtime": 618.228, "eval_samples_per_second": 2.628, "step": 1152 }, { "epoch": 128.86, "learning_rate": 0.0001, "loss": 0.011, "step": 1160 }, { "epoch": 129.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1168 }, { "epoch": 130.64, "learning_rate": 0.0001, "loss": 0.0014, "step": 1176 }, { "epoch": 131.54, "learning_rate": 0.0001, "loss": 0.0013, "step": 1184 }, { "epoch": 132.43, "learning_rate": 0.0001, "loss": 0.0018, "step": 1192 }, { "epoch": 133.32, "learning_rate": 0.0001, "loss": 0.0053, "step": 1200 }, { "epoch": 134.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1208 }, { "epoch": 135.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 1216 }, { "epoch": 135.11, "eval_exact_match": 0.6201163757273483, "eval_exec": 0.6583541147132169, "eval_loss": 0.3528030216693878, "eval_runtime": 653.8235, "eval_samples_per_second": 2.485, "step": 1216 }, { "epoch": 135.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1224 }, { "epoch": 136.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1232 }, { "epoch": 137.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1240 }, { "epoch": 138.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1248 }, { "epoch": 139.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1256 }, { "epoch": 140.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1264 }, { "epoch": 141.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1272 }, { "epoch": 142.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1280 }, { "epoch": 142.21, "eval_exact_match": 0.6201163757273483, "eval_exec": 0.6625103906899418, "eval_loss": 0.38731279969215393, "eval_runtime": 657.4977, "eval_samples_per_second": 2.471, "step": 1280 }, { "epoch": 143.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1288 }, { "epoch": 143.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1296 }, { "epoch": 144.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1304 }, { "epoch": 145.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1312 }, { "epoch": 146.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1320 }, { "epoch": 147.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1328 }, { "epoch": 148.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1336 }, { "epoch": 149.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1344 }, { "epoch": 149.32, "eval_exact_match": 0.6118038237738986, "eval_exec": 0.6541978387364921, "eval_loss": 0.38289570808410645, "eval_runtime": 654.6029, "eval_samples_per_second": 2.482, "step": 1344 }, { "epoch": 150.21, "learning_rate": 0.0001, "loss": 0.0018, "step": 1352 }, { "epoch": 151.11, "learning_rate": 0.0001, "loss": 0.0027, "step": 1360 }, { "epoch": 151.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1368 }, { "epoch": 152.86, "learning_rate": 0.0001, "loss": 0.0021, "step": 1376 }, { "epoch": 153.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1384 }, { "epoch": 154.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1392 }, { "epoch": 155.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1400 }, { "epoch": 156.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1408 }, { "epoch": 156.43, "eval_exact_match": 0.6068162926018288, "eval_exec": 0.6525353283458022, "eval_loss": 0.386453777551651, "eval_runtime": 661.6138, "eval_samples_per_second": 2.456, "step": 1408 }, { "epoch": 157.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1416 }, { "epoch": 158.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1424 }, { "epoch": 159.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1432 }, { "epoch": 159.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1440 }, { "epoch": 160.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1448 }, { "epoch": 161.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1456 }, { "epoch": 162.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1464 }, { "epoch": 163.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1472 }, { "epoch": 163.54, "eval_exact_match": 0.6059850374064838, "eval_exec": 0.6533665835411472, "eval_loss": 0.36874809861183167, "eval_runtime": 690.7636, "eval_samples_per_second": 2.352, "step": 1472 }, { "epoch": 164.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1480 }, { "epoch": 165.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 1488 }, { "epoch": 166.21, "learning_rate": 0.0001, "loss": 0.0023, "step": 1496 }, { "epoch": 167.11, "learning_rate": 0.0001, "loss": 0.0027, "step": 1504 }, { "epoch": 167.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1512 }, { "epoch": 168.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1520 }, { "epoch": 169.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1528 }, { "epoch": 170.64, "learning_rate": 0.0001, "loss": 0.0009, "step": 1536 }, { "epoch": 170.64, "eval_exact_match": 0.6134663341645885, "eval_exec": 0.6591853699085619, "eval_loss": 0.3634226620197296, "eval_runtime": 651.3159, "eval_samples_per_second": 2.495, "step": 1536 }, { "epoch": 171.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1544 }, { "epoch": 172.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1552 }, { "epoch": 173.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1560 }, { "epoch": 174.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1568 }, { "epoch": 175.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1576 }, { "epoch": 175.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1584 }, { "epoch": 176.86, "learning_rate": 0.0001, "loss": 0.0016, "step": 1592 }, { "epoch": 177.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1600 }, { "epoch": 177.75, "eval_exact_match": 0.6084788029925187, "eval_exec": 0.6541978387364921, "eval_loss": 0.36969253420829773, "eval_runtime": 616.1266, "eval_samples_per_second": 2.637, "step": 1600 }, { "epoch": 178.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1608 }, { "epoch": 179.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1616 }, { "epoch": 180.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1624 }, { "epoch": 181.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1632 }, { "epoch": 182.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1640 }, { "epoch": 183.11, "learning_rate": 0.0001, "loss": 0.001, "step": 1648 }, { "epoch": 183.97, "learning_rate": 0.0001, "loss": 0.0035, "step": 1656 }, { "epoch": 184.86, "learning_rate": 0.0001, "loss": 0.0009, "step": 1664 }, { "epoch": 184.86, "eval_exact_match": 0.6109725685785536, "eval_exec": 0.6658354114713217, "eval_loss": 0.34515219926834106, "eval_runtime": 645.9688, "eval_samples_per_second": 2.516, "step": 1664 }, { "epoch": 185.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1672 }, { "epoch": 186.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1680 }, { "epoch": 187.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1688 }, { "epoch": 188.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1696 }, { "epoch": 189.32, "learning_rate": 0.0001, "loss": 0.0154, "step": 1704 }, { "epoch": 190.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1712 }, { "epoch": 191.11, "learning_rate": 0.0001, "loss": 0.0069, "step": 1720 }, { "epoch": 191.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1728 }, { "epoch": 191.97, "eval_exact_match": 0.6109725685785536, "eval_exec": 0.6616791354945969, "eval_loss": 0.35217443108558655, "eval_runtime": 638.0561, "eval_samples_per_second": 2.547, "step": 1728 }, { "epoch": 192.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1736 }, { "epoch": 193.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1744 }, { "epoch": 194.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1752 }, { "epoch": 195.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1760 }, { "epoch": 196.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1768 }, { "epoch": 197.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1776 }, { "epoch": 198.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1784 }, { "epoch": 199.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 1792 }, { "epoch": 199.11, "eval_exact_match": 0.6076475477971738, "eval_exec": 0.655860349127182, "eval_loss": 0.38836565613746643, "eval_runtime": 698.9686, "eval_samples_per_second": 2.325, "step": 1792 }, { "epoch": 199.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1800 }, { "epoch": 200.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1808 }, { "epoch": 201.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1816 }, { "epoch": 202.64, "learning_rate": 0.0001, "loss": 0.0098, "step": 1824 }, { "epoch": 203.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1832 }, { "epoch": 204.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1840 }, { "epoch": 205.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1848 }, { "epoch": 206.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 1856 }, { "epoch": 206.21, "eval_exact_match": 0.6192851205320034, "eval_exec": 0.6650041562759768, "eval_loss": 0.3801758289337158, "eval_runtime": 619.6713, "eval_samples_per_second": 2.622, "step": 1856 }, { "epoch": 207.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1864 }, { "epoch": 207.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1872 }, { "epoch": 208.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1880 }, { "epoch": 209.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1888 }, { "epoch": 210.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 1896 }, { "epoch": 211.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1904 }, { "epoch": 212.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1912 }, { "epoch": 213.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 1920 }, { "epoch": 213.32, "eval_exact_match": 0.6134663341645885, "eval_exec": 0.6633416458852868, "eval_loss": 0.39502447843551636, "eval_runtime": 693.1979, "eval_samples_per_second": 2.344, "step": 1920 }, { "epoch": 214.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 1928 }, { "epoch": 215.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1936 }, { "epoch": 215.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 1944 }, { "epoch": 216.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 1952 }, { "epoch": 217.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 1960 }, { "epoch": 218.64, "learning_rate": 0.0001, "loss": 0.0006, "step": 1968 }, { "epoch": 219.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1976 }, { "epoch": 220.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 1984 }, { "epoch": 220.43, "eval_exact_match": 0.6192851205320034, "eval_exec": 0.6600166251039069, "eval_loss": 0.3968105614185333, "eval_runtime": 631.3521, "eval_samples_per_second": 2.574, "step": 1984 }, { "epoch": 221.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 1992 }, { "epoch": 222.21, "learning_rate": 0.0001, "loss": 0.001, "step": 2000 }, { "epoch": 223.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 2008 }, { "epoch": 223.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2016 }, { "epoch": 224.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2024 }, { "epoch": 225.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 2032 }, { "epoch": 226.64, "learning_rate": 0.0001, "loss": 0.0012, "step": 2040 }, { "epoch": 227.54, "learning_rate": 0.0001, "loss": 0.0014, "step": 2048 }, { "epoch": 227.54, "eval_exact_match": 0.6084788029925187, "eval_exec": 0.6591853699085619, "eval_loss": 0.3648475110530853, "eval_runtime": 1018.7111, "eval_samples_per_second": 1.595, "step": 2048 }, { "epoch": 228.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2056 }, { "epoch": 229.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 2064 }, { "epoch": 230.21, "learning_rate": 0.0001, "loss": 0.001, "step": 2072 }, { "epoch": 231.11, "learning_rate": 0.0001, "loss": 0.001, "step": 2080 }, { "epoch": 231.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2088 }, { "epoch": 232.86, "learning_rate": 0.0001, "loss": 0.001, "step": 2096 }, { "epoch": 233.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2104 }, { "epoch": 234.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2112 }, { "epoch": 234.64, "eval_exact_match": 0.6126350789692435, "eval_exec": 0.6633416458852868, "eval_loss": 0.39520663022994995, "eval_runtime": 677.2313, "eval_samples_per_second": 2.399, "step": 2112 }, { "epoch": 235.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 2120 }, { "epoch": 236.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2128 }, { "epoch": 237.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 2136 }, { "epoch": 238.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2144 }, { "epoch": 239.11, "learning_rate": 0.0001, "loss": 0.0006, "step": 2152 }, { "epoch": 239.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2160 }, { "epoch": 240.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 2168 }, { "epoch": 241.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 2176 }, { "epoch": 241.75, "eval_exact_match": 0.6234413965087282, "eval_exec": 0.6683291770573566, "eval_loss": 0.40271905064582825, "eval_runtime": 654.7779, "eval_samples_per_second": 2.482, "step": 2176 }, { "epoch": 242.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2184 }, { "epoch": 243.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2192 }, { "epoch": 244.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2200 }, { "epoch": 245.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 2208 }, { "epoch": 246.21, "learning_rate": 0.0001, "loss": 0.0015, "step": 2216 }, { "epoch": 247.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 2224 }, { "epoch": 247.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2232 }, { "epoch": 248.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2240 }, { "epoch": 248.86, "eval_exact_match": 0.6184538653366584, "eval_exec": 0.6641729010806318, "eval_loss": 0.3742313086986542, "eval_runtime": 577.1527, "eval_samples_per_second": 2.816, "step": 2240 }, { "epoch": 249.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 2248 }, { "epoch": 250.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 2256 }, { "epoch": 251.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2264 }, { "epoch": 252.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2272 }, { "epoch": 253.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 2280 }, { "epoch": 254.21, "learning_rate": 0.0001, "loss": 0.0015, "step": 2288 }, { "epoch": 255.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 2296 }, { "epoch": 255.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 2304 }, { "epoch": 255.97, "eval_exact_match": 0.6109725685785536, "eval_exec": 0.6450540315876975, "eval_loss": 0.3888351619243622, "eval_runtime": 585.9503, "eval_samples_per_second": 2.773, "step": 2304 }, { "epoch": 256.86, "learning_rate": 0.0001, "loss": 0.0038, "step": 2312 }, { "epoch": 257.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 2320 }, { "epoch": 258.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 2328 }, { "epoch": 259.54, "learning_rate": 0.0001, "loss": 0.001, "step": 2336 }, { "epoch": 260.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 2344 }, { "epoch": 261.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 2352 }, { "epoch": 262.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2360 }, { "epoch": 263.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 2368 }, { "epoch": 263.11, "eval_exact_match": 0.6300914380714879, "eval_exec": 0.6741479634247715, "eval_loss": 0.38216930627822876, "eval_runtime": 619.0871, "eval_samples_per_second": 2.625, "step": 2368 }, { "epoch": 263.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2376 }, { "epoch": 264.86, "learning_rate": 0.0001, "loss": 0.0006, "step": 2384 }, { "epoch": 265.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2392 }, { "epoch": 266.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 2400 }, { "epoch": 267.54, "learning_rate": 0.0001, "loss": 0.009, "step": 2408 }, { "epoch": 268.43, "learning_rate": 0.0001, "loss": 0.0006, "step": 2416 }, { "epoch": 269.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 2424 }, { "epoch": 270.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 2432 }, { "epoch": 270.21, "eval_exact_match": 0.6417290108063175, "eval_exec": 0.6816292601828762, "eval_loss": 0.385798841714859, "eval_runtime": 1200.0927, "eval_samples_per_second": 1.354, "step": 2432 } ], "max_steps": 27648, "num_train_epochs": 3072, "total_flos": 1.8557441383975617e+19, "trial_name": null, "trial_params": null }