{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "eval_steps": 500, "global_step": 1725, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.780346820809248e-07, "loss": 0.005, "step": 1 }, { "epoch": 0.13, "learning_rate": 5.202312138728324e-06, "loss": 0.0008, "step": 9 }, { "epoch": 0.26, "learning_rate": 1.0404624277456647e-05, "loss": 0.0019, "step": 18 }, { "epoch": 0.39, "learning_rate": 1.5606936416184973e-05, "loss": 0.0019, "step": 27 }, { "epoch": 0.52, "learning_rate": 2.0809248554913295e-05, "loss": 0.0016, "step": 36 }, { "epoch": 0.65, "learning_rate": 2.6011560693641617e-05, "loss": 0.0027, "step": 45 }, { "epoch": 0.78, "learning_rate": 3.1213872832369946e-05, "loss": 0.0032, "step": 54 }, { "epoch": 0.91, "learning_rate": 3.6416184971098265e-05, "loss": 0.0013, "step": 63 }, { "epoch": 1.0, "eval_loss": 0.002811400219798088, "eval_max_distance": 2, "eval_mean_distance": 0, "eval_runtime": 1.9415, "eval_samples_per_second": 42.235, "eval_steps_per_second": 1.545, "step": 69 }, { "epoch": 1.04, "learning_rate": 4.161849710982659e-05, "loss": 0.0033, "step": 72 }, { "epoch": 1.17, "learning_rate": 4.6820809248554915e-05, "loss": 0.0034, "step": 81 }, { "epoch": 1.3, "learning_rate": 5.2023121387283234e-05, "loss": 0.0015, "step": 90 }, { "epoch": 1.43, "learning_rate": 5.722543352601156e-05, "loss": 0.0042, "step": 99 }, { "epoch": 1.57, "learning_rate": 6.242774566473989e-05, "loss": 0.0025, "step": 108 }, { "epoch": 1.7, "learning_rate": 6.763005780346822e-05, "loss": 0.0012, "step": 117 }, { "epoch": 1.83, "learning_rate": 7.283236994219653e-05, "loss": 0.002, "step": 126 }, { "epoch": 1.96, "learning_rate": 7.803468208092485e-05, "loss": 0.0006, "step": 135 }, { "epoch": 2.0, "eval_loss": 0.0026117784436792135, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9167, "eval_samples_per_second": 42.781, "eval_steps_per_second": 1.565, "step": 138 }, { "epoch": 2.09, "learning_rate": 8.323699421965318e-05, "loss": 0.001, "step": 144 }, { "epoch": 2.22, "learning_rate": 8.84393063583815e-05, "loss": 0.0016, "step": 153 }, { "epoch": 2.35, "learning_rate": 9.364161849710983e-05, "loss": 0.0011, "step": 162 }, { "epoch": 2.48, "learning_rate": 9.884393063583816e-05, "loss": 0.001, "step": 171 }, { "epoch": 2.61, "learning_rate": 9.954896907216495e-05, "loss": 0.0026, "step": 180 }, { "epoch": 2.74, "learning_rate": 9.896907216494846e-05, "loss": 0.0008, "step": 189 }, { "epoch": 2.87, "learning_rate": 9.838917525773196e-05, "loss": 0.0022, "step": 198 }, { "epoch": 3.0, "learning_rate": 9.780927835051546e-05, "loss": 0.0025, "step": 207 }, { "epoch": 3.0, "eval_loss": 0.003930480219423771, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9341, "eval_samples_per_second": 42.396, "eval_steps_per_second": 1.551, "step": 207 }, { "epoch": 3.13, "learning_rate": 9.722938144329897e-05, "loss": 0.0019, "step": 216 }, { "epoch": 3.26, "learning_rate": 9.664948453608248e-05, "loss": 0.0024, "step": 225 }, { "epoch": 3.39, "learning_rate": 9.606958762886598e-05, "loss": 0.0007, "step": 234 }, { "epoch": 3.52, "learning_rate": 9.54896907216495e-05, "loss": 0.0026, "step": 243 }, { "epoch": 3.65, "learning_rate": 9.490979381443299e-05, "loss": 0.0009, "step": 252 }, { "epoch": 3.78, "learning_rate": 9.43298969072165e-05, "loss": 0.0012, "step": 261 }, { "epoch": 3.91, "learning_rate": 9.375e-05, "loss": 0.0004, "step": 270 }, { "epoch": 4.0, "eval_loss": 0.0036886015441268682, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9271, "eval_samples_per_second": 42.551, "eval_steps_per_second": 1.557, "step": 276 }, { "epoch": 4.04, "learning_rate": 9.317010309278351e-05, "loss": 0.0018, "step": 279 }, { "epoch": 4.17, "learning_rate": 9.259020618556701e-05, "loss": 0.0017, "step": 288 }, { "epoch": 4.3, "learning_rate": 9.201030927835051e-05, "loss": 0.0014, "step": 297 }, { "epoch": 4.43, "learning_rate": 9.143041237113402e-05, "loss": 0.0005, "step": 306 }, { "epoch": 4.57, "learning_rate": 9.085051546391753e-05, "loss": 0.001, "step": 315 }, { "epoch": 4.7, "learning_rate": 9.027061855670103e-05, "loss": 0.002, "step": 324 }, { "epoch": 4.83, "learning_rate": 8.969072164948454e-05, "loss": 0.0016, "step": 333 }, { "epoch": 4.96, "learning_rate": 8.911082474226806e-05, "loss": 0.0005, "step": 342 }, { "epoch": 5.0, "eval_loss": 0.009117466397583485, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9694, "eval_samples_per_second": 41.638, "eval_steps_per_second": 1.523, "step": 345 }, { "epoch": 5.09, "learning_rate": 8.853092783505154e-05, "loss": 0.0005, "step": 351 }, { "epoch": 5.22, "learning_rate": 8.795103092783505e-05, "loss": 0.0051, "step": 360 }, { "epoch": 5.35, "learning_rate": 8.737113402061856e-05, "loss": 0.0004, "step": 369 }, { "epoch": 5.48, "learning_rate": 8.679123711340206e-05, "loss": 0.0012, "step": 378 }, { "epoch": 5.61, "learning_rate": 8.621134020618558e-05, "loss": 0.001, "step": 387 }, { "epoch": 5.74, "learning_rate": 8.563144329896907e-05, "loss": 0.0015, "step": 396 }, { "epoch": 5.87, "learning_rate": 8.505154639175259e-05, "loss": 0.0016, "step": 405 }, { "epoch": 6.0, "learning_rate": 8.447164948453608e-05, "loss": 0.0009, "step": 414 }, { "epoch": 6.0, "eval_loss": 0.0005720060435123742, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9399, "eval_samples_per_second": 42.271, "eval_steps_per_second": 1.546, "step": 414 }, { "epoch": 6.13, "learning_rate": 8.38917525773196e-05, "loss": 0.0005, "step": 423 }, { "epoch": 6.26, "learning_rate": 8.331185567010311e-05, "loss": 0.0006, "step": 432 }, { "epoch": 6.39, "learning_rate": 8.273195876288659e-05, "loss": 0.0014, "step": 441 }, { "epoch": 6.52, "learning_rate": 8.21520618556701e-05, "loss": 0.0019, "step": 450 }, { "epoch": 6.65, "learning_rate": 8.157216494845362e-05, "loss": 0.001, "step": 459 }, { "epoch": 6.78, "learning_rate": 8.099226804123711e-05, "loss": 0.0008, "step": 468 }, { "epoch": 6.91, "learning_rate": 8.041237113402063e-05, "loss": 0.0016, "step": 477 }, { "epoch": 7.0, "eval_loss": 0.00027213190332986414, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9188, "eval_samples_per_second": 42.735, "eval_steps_per_second": 1.563, "step": 483 }, { "epoch": 7.04, "learning_rate": 7.983247422680414e-05, "loss": 0.0006, "step": 486 }, { "epoch": 7.17, "learning_rate": 7.925257731958762e-05, "loss": 0.007, "step": 495 }, { "epoch": 7.3, "learning_rate": 7.867268041237113e-05, "loss": 0.0004, "step": 504 }, { "epoch": 7.43, "learning_rate": 7.809278350515465e-05, "loss": 0.0016, "step": 513 }, { "epoch": 7.57, "learning_rate": 7.751288659793814e-05, "loss": 0.0006, "step": 522 }, { "epoch": 7.7, "learning_rate": 7.693298969072166e-05, "loss": 0.0011, "step": 531 }, { "epoch": 7.83, "learning_rate": 7.635309278350515e-05, "loss": 0.0014, "step": 540 }, { "epoch": 7.96, "learning_rate": 7.577319587628867e-05, "loss": 0.0012, "step": 549 }, { "epoch": 8.0, "eval_loss": 0.011139851063489914, "eval_max_distance": 5, "eval_mean_distance": 0, "eval_runtime": 1.9435, "eval_samples_per_second": 42.193, "eval_steps_per_second": 1.544, "step": 552 }, { "epoch": 8.09, "learning_rate": 7.519329896907217e-05, "loss": 0.0008, "step": 558 }, { "epoch": 8.22, "learning_rate": 7.461340206185568e-05, "loss": 0.0011, "step": 567 }, { "epoch": 8.35, "learning_rate": 7.403350515463919e-05, "loss": 0.0025, "step": 576 }, { "epoch": 8.48, "learning_rate": 7.345360824742269e-05, "loss": 0.003, "step": 585 }, { "epoch": 8.61, "learning_rate": 7.287371134020619e-05, "loss": 0.004, "step": 594 }, { "epoch": 8.74, "learning_rate": 7.22938144329897e-05, "loss": 0.002, "step": 603 }, { "epoch": 8.87, "learning_rate": 7.17139175257732e-05, "loss": 0.0007, "step": 612 }, { "epoch": 9.0, "learning_rate": 7.113402061855671e-05, "loss": 0.0008, "step": 621 }, { "epoch": 9.0, "eval_loss": 0.0003953798732254654, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.995, "eval_samples_per_second": 41.102, "eval_steps_per_second": 1.504, "step": 621 }, { "epoch": 9.13, "learning_rate": 7.055412371134022e-05, "loss": 0.0005, "step": 630 }, { "epoch": 9.26, "learning_rate": 6.99742268041237e-05, "loss": 0.0004, "step": 639 }, { "epoch": 9.39, "learning_rate": 6.939432989690722e-05, "loss": 0.0013, "step": 648 }, { "epoch": 9.52, "learning_rate": 6.881443298969073e-05, "loss": 0.0002, "step": 657 }, { "epoch": 9.65, "learning_rate": 6.823453608247423e-05, "loss": 0.0011, "step": 666 }, { "epoch": 9.78, "learning_rate": 6.765463917525774e-05, "loss": 0.0018, "step": 675 }, { "epoch": 9.91, "learning_rate": 6.707474226804124e-05, "loss": 0.0018, "step": 684 }, { "epoch": 10.0, "eval_loss": 0.00027754431357607245, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9222, "eval_samples_per_second": 42.659, "eval_steps_per_second": 1.561, "step": 690 }, { "epoch": 10.04, "learning_rate": 6.649484536082475e-05, "loss": 0.0011, "step": 693 }, { "epoch": 10.17, "learning_rate": 6.591494845360825e-05, "loss": 0.0006, "step": 702 }, { "epoch": 10.3, "learning_rate": 6.533505154639176e-05, "loss": 0.0011, "step": 711 }, { "epoch": 10.43, "learning_rate": 6.475515463917527e-05, "loss": 0.0013, "step": 720 }, { "epoch": 10.57, "learning_rate": 6.417525773195877e-05, "loss": 0.0006, "step": 729 }, { "epoch": 10.7, "learning_rate": 6.359536082474227e-05, "loss": 0.0018, "step": 738 }, { "epoch": 10.83, "learning_rate": 6.301546391752578e-05, "loss": 0.0016, "step": 747 }, { "epoch": 10.96, "learning_rate": 6.243556701030928e-05, "loss": 0.0028, "step": 756 }, { "epoch": 11.0, "eval_loss": 0.00033258015173487365, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9385, "eval_samples_per_second": 42.301, "eval_steps_per_second": 1.548, "step": 759 }, { "epoch": 11.09, "learning_rate": 6.185567010309279e-05, "loss": 0.0009, "step": 765 }, { "epoch": 11.22, "learning_rate": 6.12757731958763e-05, "loss": 0.0005, "step": 774 }, { "epoch": 11.35, "learning_rate": 6.069587628865979e-05, "loss": 0.0011, "step": 783 }, { "epoch": 11.48, "learning_rate": 6.01159793814433e-05, "loss": 0.0007, "step": 792 }, { "epoch": 11.61, "learning_rate": 5.953608247422681e-05, "loss": 0.0012, "step": 801 }, { "epoch": 11.74, "learning_rate": 5.8956185567010315e-05, "loss": 0.0021, "step": 810 }, { "epoch": 11.87, "learning_rate": 5.837628865979382e-05, "loss": 0.0006, "step": 819 }, { "epoch": 12.0, "learning_rate": 5.779639175257732e-05, "loss": 0.0008, "step": 828 }, { "epoch": 12.0, "eval_loss": 0.0002690624096430838, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9448, "eval_samples_per_second": 42.163, "eval_steps_per_second": 1.543, "step": 828 }, { "epoch": 12.13, "learning_rate": 5.721649484536082e-05, "loss": 0.001, "step": 837 }, { "epoch": 12.26, "learning_rate": 5.663659793814433e-05, "loss": 0.0012, "step": 846 }, { "epoch": 12.39, "learning_rate": 5.605670103092784e-05, "loss": 0.0005, "step": 855 }, { "epoch": 12.52, "learning_rate": 5.5476804123711345e-05, "loss": 0.0011, "step": 864 }, { "epoch": 12.65, "learning_rate": 5.489690721649485e-05, "loss": 0.0026, "step": 873 }, { "epoch": 12.78, "learning_rate": 5.431701030927835e-05, "loss": 0.0009, "step": 882 }, { "epoch": 12.91, "learning_rate": 5.3737113402061854e-05, "loss": 0.001, "step": 891 }, { "epoch": 13.0, "eval_loss": 0.0004277784610167146, "eval_max_distance": 2, "eval_mean_distance": 0, "eval_runtime": 1.9315, "eval_samples_per_second": 42.454, "eval_steps_per_second": 1.553, "step": 897 }, { "epoch": 13.04, "learning_rate": 5.3157216494845366e-05, "loss": 0.0007, "step": 900 }, { "epoch": 13.17, "learning_rate": 5.257731958762887e-05, "loss": 0.0008, "step": 909 }, { "epoch": 13.3, "learning_rate": 5.1997422680412376e-05, "loss": 0.0005, "step": 918 }, { "epoch": 13.43, "learning_rate": 5.1417525773195874e-05, "loss": 0.0004, "step": 927 }, { "epoch": 13.57, "learning_rate": 5.083762886597938e-05, "loss": 0.0006, "step": 936 }, { "epoch": 13.7, "learning_rate": 5.025773195876289e-05, "loss": 0.0025, "step": 945 }, { "epoch": 13.83, "learning_rate": 4.9677835051546396e-05, "loss": 0.0009, "step": 954 }, { "epoch": 13.96, "learning_rate": 4.9097938144329895e-05, "loss": 0.0026, "step": 963 }, { "epoch": 14.0, "eval_loss": 0.0005385838449001312, "eval_max_distance": 2, "eval_mean_distance": 0, "eval_runtime": 1.993, "eval_samples_per_second": 41.144, "eval_steps_per_second": 1.505, "step": 966 }, { "epoch": 14.09, "learning_rate": 4.8518041237113407e-05, "loss": 0.0016, "step": 972 }, { "epoch": 14.22, "learning_rate": 4.793814432989691e-05, "loss": 0.0014, "step": 981 }, { "epoch": 14.35, "learning_rate": 4.735824742268041e-05, "loss": 0.0007, "step": 990 }, { "epoch": 14.48, "learning_rate": 4.677835051546392e-05, "loss": 0.0031, "step": 999 }, { "epoch": 14.61, "learning_rate": 4.619845360824743e-05, "loss": 0.0008, "step": 1008 }, { "epoch": 14.74, "learning_rate": 4.561855670103093e-05, "loss": 0.0028, "step": 1017 }, { "epoch": 14.87, "learning_rate": 4.503865979381444e-05, "loss": 0.0004, "step": 1026 }, { "epoch": 15.0, "learning_rate": 4.4458762886597936e-05, "loss": 0.0015, "step": 1035 }, { "epoch": 15.0, "eval_loss": 0.0007138837827369571, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9688, "eval_samples_per_second": 41.651, "eval_steps_per_second": 1.524, "step": 1035 }, { "epoch": 15.13, "learning_rate": 4.387886597938145e-05, "loss": 0.0005, "step": 1044 }, { "epoch": 15.26, "learning_rate": 4.329896907216495e-05, "loss": 0.0011, "step": 1053 }, { "epoch": 15.39, "learning_rate": 4.271907216494845e-05, "loss": 0.0001, "step": 1062 }, { "epoch": 15.52, "learning_rate": 4.213917525773196e-05, "loss": 0.0009, "step": 1071 }, { "epoch": 15.65, "learning_rate": 4.155927835051547e-05, "loss": 0.0017, "step": 1080 }, { "epoch": 15.78, "learning_rate": 4.097938144329897e-05, "loss": 0.0013, "step": 1089 }, { "epoch": 15.91, "learning_rate": 4.039948453608248e-05, "loss": 0.0009, "step": 1098 }, { "epoch": 16.0, "eval_loss": 0.0006717974320054054, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9244, "eval_samples_per_second": 42.612, "eval_steps_per_second": 1.559, "step": 1104 }, { "epoch": 16.04, "learning_rate": 3.9819587628865976e-05, "loss": 0.0015, "step": 1107 }, { "epoch": 16.17, "learning_rate": 3.923969072164949e-05, "loss": 0.0003, "step": 1116 }, { "epoch": 16.3, "learning_rate": 3.865979381443299e-05, "loss": 0.0006, "step": 1125 }, { "epoch": 16.43, "learning_rate": 3.807989690721649e-05, "loss": 0.0013, "step": 1134 }, { "epoch": 16.57, "learning_rate": 3.7500000000000003e-05, "loss": 0.0009, "step": 1143 }, { "epoch": 16.7, "learning_rate": 3.692010309278351e-05, "loss": 0.0004, "step": 1152 }, { "epoch": 16.83, "learning_rate": 3.6340206185567014e-05, "loss": 0.0008, "step": 1161 }, { "epoch": 16.96, "learning_rate": 3.576030927835052e-05, "loss": 0.0014, "step": 1170 }, { "epoch": 17.0, "eval_loss": 0.00033988503855653107, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9136, "eval_samples_per_second": 42.851, "eval_steps_per_second": 1.568, "step": 1173 }, { "epoch": 17.09, "learning_rate": 3.5180412371134024e-05, "loss": 0.0016, "step": 1179 }, { "epoch": 17.22, "learning_rate": 3.460051546391753e-05, "loss": 0.0004, "step": 1188 }, { "epoch": 17.35, "learning_rate": 3.4020618556701034e-05, "loss": 0.002, "step": 1197 }, { "epoch": 17.48, "learning_rate": 3.344072164948453e-05, "loss": 0.0012, "step": 1206 }, { "epoch": 17.61, "learning_rate": 3.2860824742268044e-05, "loss": 0.0002, "step": 1215 }, { "epoch": 17.74, "learning_rate": 3.228092783505155e-05, "loss": 0.0006, "step": 1224 }, { "epoch": 17.87, "learning_rate": 3.1701030927835054e-05, "loss": 0.0044, "step": 1233 }, { "epoch": 18.0, "learning_rate": 3.112113402061856e-05, "loss": 0.001, "step": 1242 }, { "epoch": 18.0, "eval_loss": 0.00037691937177442014, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9496, "eval_samples_per_second": 42.059, "eval_steps_per_second": 1.539, "step": 1242 }, { "epoch": 18.13, "learning_rate": 3.0541237113402065e-05, "loss": 0.0005, "step": 1251 }, { "epoch": 18.26, "learning_rate": 2.9961340206185566e-05, "loss": 0.0009, "step": 1260 }, { "epoch": 18.39, "learning_rate": 2.9381443298969075e-05, "loss": 0.0008, "step": 1269 }, { "epoch": 18.52, "learning_rate": 2.8801546391752577e-05, "loss": 0.0007, "step": 1278 }, { "epoch": 18.65, "learning_rate": 2.8221649484536085e-05, "loss": 0.0016, "step": 1287 }, { "epoch": 18.78, "learning_rate": 2.764175257731959e-05, "loss": 0.0012, "step": 1296 }, { "epoch": 18.91, "learning_rate": 2.7061855670103092e-05, "loss": 0.0007, "step": 1305 }, { "epoch": 19.0, "eval_loss": 0.001327142701484263, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9144, "eval_samples_per_second": 42.834, "eval_steps_per_second": 1.567, "step": 1311 }, { "epoch": 19.04, "learning_rate": 2.64819587628866e-05, "loss": 0.0005, "step": 1314 }, { "epoch": 19.17, "learning_rate": 2.5902061855670106e-05, "loss": 0.0011, "step": 1323 }, { "epoch": 19.3, "learning_rate": 2.5322164948453607e-05, "loss": 0.0006, "step": 1332 }, { "epoch": 19.43, "learning_rate": 2.4742268041237116e-05, "loss": 0.0004, "step": 1341 }, { "epoch": 19.57, "learning_rate": 2.416237113402062e-05, "loss": 0.002, "step": 1350 }, { "epoch": 19.7, "learning_rate": 2.3582474226804126e-05, "loss": 0.0003, "step": 1359 }, { "epoch": 19.83, "learning_rate": 2.3002577319587628e-05, "loss": 0.0011, "step": 1368 }, { "epoch": 19.96, "learning_rate": 2.2422680412371136e-05, "loss": 0.0013, "step": 1377 }, { "epoch": 20.0, "eval_loss": 0.0012958323350176215, "eval_max_distance": 3, "eval_mean_distance": 0, "eval_runtime": 1.9255, "eval_samples_per_second": 42.587, "eval_steps_per_second": 1.558, "step": 1380 }, { "epoch": 20.09, "learning_rate": 2.184278350515464e-05, "loss": 0.0017, "step": 1386 }, { "epoch": 20.22, "learning_rate": 2.1262886597938146e-05, "loss": 0.0011, "step": 1395 }, { "epoch": 20.35, "learning_rate": 2.0682989690721648e-05, "loss": 0.0014, "step": 1404 }, { "epoch": 20.48, "learning_rate": 2.0103092783505157e-05, "loss": 0.0006, "step": 1413 }, { "epoch": 20.61, "learning_rate": 1.952319587628866e-05, "loss": 0.0003, "step": 1422 }, { "epoch": 20.74, "learning_rate": 1.8943298969072167e-05, "loss": 0.0002, "step": 1431 }, { "epoch": 20.87, "learning_rate": 1.8363402061855672e-05, "loss": 0.0024, "step": 1440 }, { "epoch": 21.0, "learning_rate": 1.7783505154639177e-05, "loss": 0.0007, "step": 1449 }, { "epoch": 21.0, "eval_loss": 0.0002567500632721931, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9342, "eval_samples_per_second": 42.395, "eval_steps_per_second": 1.551, "step": 1449 }, { "epoch": 21.13, "learning_rate": 1.7203608247422682e-05, "loss": 0.0006, "step": 1458 }, { "epoch": 21.26, "learning_rate": 1.6623711340206187e-05, "loss": 0.002, "step": 1467 }, { "epoch": 21.39, "learning_rate": 1.6043814432989692e-05, "loss": 0.0007, "step": 1476 }, { "epoch": 21.52, "learning_rate": 1.5463917525773197e-05, "loss": 0.0007, "step": 1485 }, { "epoch": 21.65, "learning_rate": 1.4884020618556702e-05, "loss": 0.0016, "step": 1494 }, { "epoch": 21.78, "learning_rate": 1.4304123711340206e-05, "loss": 0.0008, "step": 1503 }, { "epoch": 21.91, "learning_rate": 1.3724226804123713e-05, "loss": 0.0016, "step": 1512 }, { "epoch": 22.0, "eval_loss": 0.0002821955131366849, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9113, "eval_samples_per_second": 42.902, "eval_steps_per_second": 1.57, "step": 1518 }, { "epoch": 22.04, "learning_rate": 1.3144329896907218e-05, "loss": 0.0018, "step": 1521 }, { "epoch": 22.17, "learning_rate": 1.2564432989690723e-05, "loss": 0.0027, "step": 1530 }, { "epoch": 22.3, "learning_rate": 1.1984536082474228e-05, "loss": 0.0002, "step": 1539 }, { "epoch": 22.43, "learning_rate": 1.1404639175257733e-05, "loss": 0.001, "step": 1548 }, { "epoch": 22.57, "learning_rate": 1.0824742268041238e-05, "loss": 0.0007, "step": 1557 }, { "epoch": 22.7, "learning_rate": 1.0244845360824743e-05, "loss": 0.0009, "step": 1566 }, { "epoch": 22.83, "learning_rate": 9.664948453608248e-06, "loss": 0.0012, "step": 1575 }, { "epoch": 22.96, "learning_rate": 9.085051546391753e-06, "loss": 0.0013, "step": 1584 }, { "epoch": 23.0, "eval_loss": 0.00030223012436181307, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9675, "eval_samples_per_second": 41.677, "eval_steps_per_second": 1.525, "step": 1587 }, { "epoch": 23.09, "learning_rate": 8.505154639175259e-06, "loss": 0.0025, "step": 1593 }, { "epoch": 23.22, "learning_rate": 7.925257731958764e-06, "loss": 0.001, "step": 1602 }, { "epoch": 23.35, "learning_rate": 7.345360824742269e-06, "loss": 0.0004, "step": 1611 }, { "epoch": 23.48, "learning_rate": 6.765463917525773e-06, "loss": 0.0006, "step": 1620 }, { "epoch": 23.61, "learning_rate": 6.185567010309279e-06, "loss": 0.001, "step": 1629 }, { "epoch": 23.74, "learning_rate": 5.605670103092784e-06, "loss": 0.0012, "step": 1638 }, { "epoch": 23.87, "learning_rate": 5.025773195876289e-06, "loss": 0.0013, "step": 1647 }, { "epoch": 24.0, "learning_rate": 4.445876288659794e-06, "loss": 0.0004, "step": 1656 }, { "epoch": 24.0, "eval_loss": 0.0002631743554957211, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 1.9278, "eval_samples_per_second": 42.536, "eval_steps_per_second": 1.556, "step": 1656 }, { "epoch": 24.13, "learning_rate": 3.865979381443299e-06, "loss": 0.0018, "step": 1665 }, { "epoch": 24.26, "learning_rate": 3.2860824742268044e-06, "loss": 0.0006, "step": 1674 }, { "epoch": 24.39, "learning_rate": 2.7061855670103095e-06, "loss": 0.0012, "step": 1683 }, { "epoch": 24.52, "learning_rate": 2.1262886597938146e-06, "loss": 0.0009, "step": 1692 }, { "epoch": 24.65, "learning_rate": 1.5463917525773197e-06, "loss": 0.0007, "step": 1701 }, { "epoch": 24.78, "learning_rate": 9.664948453608248e-07, "loss": 0.0009, "step": 1710 }, { "epoch": 24.91, "learning_rate": 3.8659793814432993e-07, "loss": 0.001, "step": 1719 }, { "epoch": 25.0, "eval_loss": 0.0002593309909570962, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.0449, "eval_samples_per_second": 40.1, "eval_steps_per_second": 1.467, "step": 1725 }, { "epoch": 25.0, "step": 1725, "total_flos": 459342194208768.0, "train_loss": 0.0013090899151048043, "train_runtime": 199.817, "train_samples_per_second": 256.61, "train_steps_per_second": 8.633 } ], "logging_steps": 9, "max_steps": 1725, "num_train_epochs": 25, "save_steps": 18, "total_flos": 459342194208768.0, "trial_name": null, "trial_params": null }