{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 49400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.51, "learning_rate": 9.898785425101214e-06, "loss": 4.6519, "step": 500 }, { "epoch": 1.0, "eval_loss": 2.3328564167022705, "eval_mae": 1.2115871906280518, "eval_mse": 2.3328561782836914, "eval_rmse": 1.5273690223693848, "eval_runtime": 2.139, "eval_samples_per_second": 460.504, "eval_steps_per_second": 57.972, "step": 988 }, { "epoch": 1.01, "learning_rate": 9.79757085020243e-06, "loss": 1.9813, "step": 1000 }, { "epoch": 1.52, "learning_rate": 9.696356275303645e-06, "loss": 1.7364, "step": 1500 }, { "epoch": 2.0, "eval_loss": 2.25815749168396, "eval_mae": 1.1845210790634155, "eval_mse": 2.25815749168396, "eval_rmse": 1.5027166604995728, "eval_runtime": 2.2991, "eval_samples_per_second": 428.436, "eval_steps_per_second": 53.935, "step": 1976 }, { "epoch": 2.02, "learning_rate": 9.595141700404859e-06, "loss": 1.6471, "step": 2000 }, { "epoch": 2.53, "learning_rate": 9.493927125506074e-06, "loss": 1.4318, "step": 2500 }, { "epoch": 3.0, "eval_loss": 2.2711498737335205, "eval_mae": 1.195154070854187, "eval_mse": 2.2711498737335205, "eval_rmse": 1.5070334672927856, "eval_runtime": 1.9177, "eval_samples_per_second": 513.633, "eval_steps_per_second": 64.66, "step": 2964 }, { "epoch": 3.04, "learning_rate": 9.392712550607288e-06, "loss": 1.3929, "step": 3000 }, { "epoch": 3.54, "learning_rate": 9.291497975708503e-06, "loss": 1.2099, "step": 3500 }, { "epoch": 4.0, "eval_loss": 2.5615780353546143, "eval_mae": 1.2618587017059326, "eval_mse": 2.5615780353546143, "eval_rmse": 1.60049307346344, "eval_runtime": 2.4694, "eval_samples_per_second": 398.879, "eval_steps_per_second": 50.214, "step": 3952 }, { "epoch": 4.05, "learning_rate": 9.190283400809717e-06, "loss": 1.1585, "step": 4000 }, { "epoch": 4.55, "learning_rate": 9.089068825910932e-06, "loss": 0.9984, "step": 4500 }, { "epoch": 5.0, "eval_loss": 2.386498212814331, "eval_mae": 1.2156014442443848, "eval_mse": 2.386498212814331, "eval_rmse": 1.5448294878005981, "eval_runtime": 2.1201, "eval_samples_per_second": 464.611, "eval_steps_per_second": 58.489, "step": 4940 }, { "epoch": 5.06, "learning_rate": 8.987854251012147e-06, "loss": 1.0114, "step": 5000 }, { "epoch": 5.57, "learning_rate": 8.886639676113361e-06, "loss": 0.897, "step": 5500 }, { "epoch": 6.0, "eval_loss": 2.3048605918884277, "eval_mae": 1.186144471168518, "eval_mse": 2.3048605918884277, "eval_rmse": 1.5181766748428345, "eval_runtime": 2.4807, "eval_samples_per_second": 397.063, "eval_steps_per_second": 49.986, "step": 5928 }, { "epoch": 6.07, "learning_rate": 8.785425101214575e-06, "loss": 0.912, "step": 6000 }, { "epoch": 6.58, "learning_rate": 8.68421052631579e-06, "loss": 0.7973, "step": 6500 }, { "epoch": 7.0, "eval_loss": 2.423027992248535, "eval_mae": 1.223141074180603, "eval_mse": 2.4230284690856934, "eval_rmse": 1.556607961654663, "eval_runtime": 2.3801, "eval_samples_per_second": 413.851, "eval_steps_per_second": 52.099, "step": 6916 }, { "epoch": 7.09, "learning_rate": 8.582995951417005e-06, "loss": 0.8141, "step": 7000 }, { "epoch": 7.59, "learning_rate": 8.481781376518219e-06, "loss": 0.7115, "step": 7500 }, { "epoch": 8.0, "eval_loss": 2.51936936378479, "eval_mae": 1.2446390390396118, "eval_mse": 2.51936936378479, "eval_rmse": 1.587252140045166, "eval_runtime": 2.2497, "eval_samples_per_second": 437.844, "eval_steps_per_second": 55.119, "step": 7904 }, { "epoch": 8.1, "learning_rate": 8.380566801619434e-06, "loss": 0.7455, "step": 8000 }, { "epoch": 8.6, "learning_rate": 8.279352226720648e-06, "loss": 0.6548, "step": 8500 }, { "epoch": 9.0, "eval_loss": 2.3894152641296387, "eval_mae": 1.20915687084198, "eval_mse": 2.3894152641296387, "eval_rmse": 1.5457733869552612, "eval_runtime": 2.012, "eval_samples_per_second": 489.572, "eval_steps_per_second": 61.631, "step": 8892 }, { "epoch": 9.11, "learning_rate": 8.178137651821862e-06, "loss": 0.6625, "step": 9000 }, { "epoch": 9.62, "learning_rate": 8.076923076923077e-06, "loss": 0.6466, "step": 9500 }, { "epoch": 10.0, "eval_loss": 2.3797826766967773, "eval_mae": 1.209175944328308, "eval_mse": 2.3797826766967773, "eval_rmse": 1.5426543951034546, "eval_runtime": 1.9282, "eval_samples_per_second": 510.852, "eval_steps_per_second": 64.31, "step": 9880 }, { "epoch": 10.12, "learning_rate": 7.975708502024292e-06, "loss": 0.6429, "step": 10000 }, { "epoch": 10.63, "learning_rate": 7.874493927125508e-06, "loss": 0.6218, "step": 10500 }, { "epoch": 11.0, "eval_loss": 2.325528383255005, "eval_mae": 1.197866678237915, "eval_mse": 2.325528383255005, "eval_rmse": 1.5249682664871216, "eval_runtime": 2.2819, "eval_samples_per_second": 431.658, "eval_steps_per_second": 54.341, "step": 10868 }, { "epoch": 11.13, "learning_rate": 7.773279352226721e-06, "loss": 0.596, "step": 11000 }, { "epoch": 11.64, "learning_rate": 7.672064777327935e-06, "loss": 0.5498, "step": 11500 }, { "epoch": 12.0, "eval_loss": 2.391416072845459, "eval_mae": 1.2055319547653198, "eval_mse": 2.391416072845459, "eval_rmse": 1.5464204549789429, "eval_runtime": 2.3484, "eval_samples_per_second": 419.443, "eval_steps_per_second": 52.803, "step": 11856 }, { "epoch": 12.15, "learning_rate": 7.570850202429151e-06, "loss": 0.5617, "step": 12000 }, { "epoch": 12.65, "learning_rate": 7.469635627530365e-06, "loss": 0.5403, "step": 12500 }, { "epoch": 13.0, "eval_loss": 2.4148483276367188, "eval_mae": 1.2168691158294678, "eval_mse": 2.4148483276367188, "eval_rmse": 1.5539782047271729, "eval_runtime": 2.3371, "eval_samples_per_second": 421.463, "eval_steps_per_second": 53.057, "step": 12844 }, { "epoch": 13.16, "learning_rate": 7.368421052631579e-06, "loss": 0.5424, "step": 13000 }, { "epoch": 13.66, "learning_rate": 7.267206477732795e-06, "loss": 0.5392, "step": 13500 }, { "epoch": 14.0, "eval_loss": 2.429582357406616, "eval_mae": 1.2232253551483154, "eval_mse": 2.429582357406616, "eval_rmse": 1.5587117671966553, "eval_runtime": 1.971, "eval_samples_per_second": 499.752, "eval_steps_per_second": 62.913, "step": 13832 }, { "epoch": 14.17, "learning_rate": 7.165991902834008e-06, "loss": 0.5105, "step": 14000 }, { "epoch": 14.68, "learning_rate": 7.064777327935223e-06, "loss": 0.5224, "step": 14500 }, { "epoch": 15.0, "eval_loss": 2.279872417449951, "eval_mae": 1.1799190044403076, "eval_mse": 2.279872417449951, "eval_rmse": 1.5099246501922607, "eval_runtime": 2.2712, "eval_samples_per_second": 433.694, "eval_steps_per_second": 54.597, "step": 14820 }, { "epoch": 15.18, "learning_rate": 6.963562753036438e-06, "loss": 0.5082, "step": 15000 }, { "epoch": 15.69, "learning_rate": 6.862348178137653e-06, "loss": 0.4868, "step": 15500 }, { "epoch": 16.0, "eval_loss": 2.309877634048462, "eval_mae": 1.1822220087051392, "eval_mse": 2.309877634048462, "eval_rmse": 1.519828200340271, "eval_runtime": 1.9368, "eval_samples_per_second": 508.564, "eval_steps_per_second": 64.022, "step": 15808 }, { "epoch": 16.19, "learning_rate": 6.761133603238867e-06, "loss": 0.5021, "step": 16000 }, { "epoch": 16.7, "learning_rate": 6.6599190283400816e-06, "loss": 0.4702, "step": 16500 }, { "epoch": 17.0, "eval_loss": 2.205157518386841, "eval_mae": 1.1669542789459229, "eval_mse": 2.205157518386841, "eval_rmse": 1.4849772453308105, "eval_runtime": 2.2927, "eval_samples_per_second": 429.627, "eval_steps_per_second": 54.085, "step": 16796 }, { "epoch": 17.21, "learning_rate": 6.558704453441296e-06, "loss": 0.4905, "step": 17000 }, { "epoch": 17.71, "learning_rate": 6.457489878542511e-06, "loss": 0.4665, "step": 17500 }, { "epoch": 18.0, "eval_loss": 2.323201894760132, "eval_mae": 1.1936134099960327, "eval_mse": 2.323201894760132, "eval_rmse": 1.5242053270339966, "eval_runtime": 2.1851, "eval_samples_per_second": 450.772, "eval_steps_per_second": 56.747, "step": 17784 }, { "epoch": 18.22, "learning_rate": 6.356275303643725e-06, "loss": 0.4431, "step": 18000 }, { "epoch": 18.72, "learning_rate": 6.2550607287449395e-06, "loss": 0.483, "step": 18500 }, { "epoch": 19.0, "eval_loss": 2.3569979667663574, "eval_mae": 1.2059428691864014, "eval_mse": 2.3569979667663574, "eval_rmse": 1.5352517366409302, "eval_runtime": 6.1451, "eval_samples_per_second": 160.29, "eval_steps_per_second": 20.179, "step": 18772 }, { "epoch": 19.23, "learning_rate": 6.153846153846155e-06, "loss": 0.4433, "step": 19000 }, { "epoch": 19.74, "learning_rate": 6.0526315789473685e-06, "loss": 0.4561, "step": 19500 }, { "epoch": 20.0, "eval_loss": 2.313786268234253, "eval_mae": 1.190987229347229, "eval_mse": 2.313786268234253, "eval_rmse": 1.5211135149002075, "eval_runtime": 6.1814, "eval_samples_per_second": 159.348, "eval_steps_per_second": 20.06, "step": 19760 }, { "epoch": 20.24, "learning_rate": 5.951417004048583e-06, "loss": 0.4688, "step": 20000 }, { "epoch": 20.75, "learning_rate": 5.850202429149798e-06, "loss": 0.4577, "step": 20500 }, { "epoch": 21.0, "eval_loss": 2.2783560752868652, "eval_mae": 1.1627191305160522, "eval_mse": 2.2783560752868652, "eval_rmse": 1.5094224214553833, "eval_runtime": 6.5272, "eval_samples_per_second": 150.907, "eval_steps_per_second": 18.997, "step": 20748 }, { "epoch": 21.26, "learning_rate": 5.748987854251013e-06, "loss": 0.4261, "step": 21000 }, { "epoch": 21.76, "learning_rate": 5.6477732793522265e-06, "loss": 0.4251, "step": 21500 }, { "epoch": 22.0, "eval_loss": 2.3060412406921387, "eval_mae": 1.176581621170044, "eval_mse": 2.3060412406921387, "eval_rmse": 1.5185655355453491, "eval_runtime": 6.411, "eval_samples_per_second": 153.641, "eval_steps_per_second": 19.342, "step": 21736 }, { "epoch": 22.27, "learning_rate": 5.546558704453442e-06, "loss": 0.4235, "step": 22000 }, { "epoch": 22.77, "learning_rate": 5.445344129554656e-06, "loss": 0.4477, "step": 22500 }, { "epoch": 23.0, "eval_loss": 2.3399007320404053, "eval_mae": 1.1934117078781128, "eval_mse": 2.339900493621826, "eval_rmse": 1.5296733379364014, "eval_runtime": 6.5358, "eval_samples_per_second": 150.708, "eval_steps_per_second": 18.972, "step": 22724 }, { "epoch": 23.28, "learning_rate": 5.344129554655872e-06, "loss": 0.3849, "step": 23000 }, { "epoch": 23.79, "learning_rate": 5.242914979757085e-06, "loss": 0.4274, "step": 23500 }, { "epoch": 24.0, "eval_loss": 2.2604634761810303, "eval_mae": 1.1710097789764404, "eval_mse": 2.2604634761810303, "eval_rmse": 1.503483772277832, "eval_runtime": 6.4581, "eval_samples_per_second": 152.522, "eval_steps_per_second": 19.201, "step": 23712 }, { "epoch": 24.29, "learning_rate": 5.1417004048583e-06, "loss": 0.402, "step": 24000 }, { "epoch": 24.8, "learning_rate": 5.040485829959515e-06, "loss": 0.4176, "step": 24500 }, { "epoch": 25.0, "eval_loss": 2.277299165725708, "eval_mae": 1.1737443208694458, "eval_mse": 2.277299404144287, "eval_rmse": 1.5090724229812622, "eval_runtime": 6.5489, "eval_samples_per_second": 150.407, "eval_steps_per_second": 18.935, "step": 24700 }, { "epoch": 25.3, "learning_rate": 4.939271255060729e-06, "loss": 0.4278, "step": 25000 }, { "epoch": 25.81, "learning_rate": 4.838056680161944e-06, "loss": 0.4105, "step": 25500 }, { "epoch": 26.0, "eval_loss": 2.3140110969543457, "eval_mae": 1.1842072010040283, "eval_mse": 2.3140110969543457, "eval_rmse": 1.521187424659729, "eval_runtime": 6.5703, "eval_samples_per_second": 149.918, "eval_steps_per_second": 18.873, "step": 25688 }, { "epoch": 26.32, "learning_rate": 4.736842105263158e-06, "loss": 0.404, "step": 26000 }, { "epoch": 26.82, "learning_rate": 4.635627530364373e-06, "loss": 0.3843, "step": 26500 }, { "epoch": 27.0, "eval_loss": 2.2196788787841797, "eval_mae": 1.1630769968032837, "eval_mse": 2.2196786403656006, "eval_rmse": 1.489858627319336, "eval_runtime": 4.2474, "eval_samples_per_second": 231.905, "eval_steps_per_second": 29.194, "step": 26676 }, { "epoch": 27.33, "learning_rate": 4.534412955465588e-06, "loss": 0.4395, "step": 27000 }, { "epoch": 27.83, "learning_rate": 4.433198380566802e-06, "loss": 0.3985, "step": 27500 }, { "epoch": 28.0, "eval_loss": 2.2451844215393066, "eval_mae": 1.1653474569320679, "eval_mse": 2.2451844215393066, "eval_rmse": 1.4983938932418823, "eval_runtime": 4.5968, "eval_samples_per_second": 214.279, "eval_steps_per_second": 26.975, "step": 27664 }, { "epoch": 28.34, "learning_rate": 4.3319838056680166e-06, "loss": 0.3633, "step": 28000 }, { "epoch": 28.85, "learning_rate": 4.230769230769231e-06, "loss": 0.4055, "step": 28500 }, { "epoch": 29.0, "eval_loss": 2.1678428649902344, "eval_mae": 1.1424471139907837, "eval_mse": 2.1678428649902344, "eval_rmse": 1.4723596572875977, "eval_runtime": 3.4579, "eval_samples_per_second": 284.854, "eval_steps_per_second": 35.86, "step": 28652 }, { "epoch": 29.35, "learning_rate": 4.1295546558704455e-06, "loss": 0.4101, "step": 29000 }, { "epoch": 29.86, "learning_rate": 4.028340080971661e-06, "loss": 0.3916, "step": 29500 }, { "epoch": 30.0, "eval_loss": 2.2453699111938477, "eval_mae": 1.166300654411316, "eval_mse": 2.2453699111938477, "eval_rmse": 1.4984558820724487, "eval_runtime": 2.9933, "eval_samples_per_second": 329.066, "eval_steps_per_second": 41.426, "step": 29640 }, { "epoch": 30.36, "learning_rate": 3.9271255060728745e-06, "loss": 0.3769, "step": 30000 }, { "epoch": 30.87, "learning_rate": 3.825910931174089e-06, "loss": 0.3905, "step": 30500 }, { "epoch": 31.0, "eval_loss": 2.278787136077881, "eval_mae": 1.170925259590149, "eval_mse": 2.278787136077881, "eval_rmse": 1.5095652341842651, "eval_runtime": 2.1743, "eval_samples_per_second": 453.018, "eval_steps_per_second": 57.03, "step": 30628 }, { "epoch": 31.38, "learning_rate": 3.724696356275304e-06, "loss": 0.3638, "step": 31000 }, { "epoch": 31.88, "learning_rate": 3.6234817813765184e-06, "loss": 0.4033, "step": 31500 }, { "epoch": 32.0, "eval_loss": 2.218869686126709, "eval_mae": 1.1555440425872803, "eval_mse": 2.218869686126709, "eval_rmse": 1.4895870685577393, "eval_runtime": 2.8668, "eval_samples_per_second": 343.59, "eval_steps_per_second": 43.254, "step": 31616 }, { "epoch": 32.39, "learning_rate": 3.522267206477733e-06, "loss": 0.3432, "step": 32000 }, { "epoch": 32.89, "learning_rate": 3.421052631578948e-06, "loss": 0.3603, "step": 32500 }, { "epoch": 33.0, "eval_loss": 2.2483489513397217, "eval_mae": 1.1578136682510376, "eval_mse": 2.2483489513397217, "eval_rmse": 1.4994494915008545, "eval_runtime": 2.7175, "eval_samples_per_second": 362.47, "eval_steps_per_second": 45.631, "step": 32604 }, { "epoch": 33.4, "learning_rate": 3.3198380566801623e-06, "loss": 0.3832, "step": 33000 }, { "epoch": 33.91, "learning_rate": 3.218623481781377e-06, "loss": 0.3955, "step": 33500 }, { "epoch": 34.0, "eval_loss": 2.2034738063812256, "eval_mae": 1.148417353630066, "eval_mse": 2.2034738063812256, "eval_rmse": 1.484410285949707, "eval_runtime": 2.6975, "eval_samples_per_second": 365.15, "eval_steps_per_second": 45.968, "step": 33592 }, { "epoch": 34.41, "learning_rate": 3.1174089068825913e-06, "loss": 0.3517, "step": 34000 }, { "epoch": 34.92, "learning_rate": 3.0161943319838062e-06, "loss": 0.3802, "step": 34500 }, { "epoch": 35.0, "eval_loss": 2.2388076782226562, "eval_mae": 1.1580203771591187, "eval_mse": 2.238807201385498, "eval_rmse": 1.4962644577026367, "eval_runtime": 2.781, "eval_samples_per_second": 354.184, "eval_steps_per_second": 44.588, "step": 34580 }, { "epoch": 35.43, "learning_rate": 2.9149797570850203e-06, "loss": 0.3543, "step": 35000 }, { "epoch": 35.93, "learning_rate": 2.8137651821862348e-06, "loss": 0.3818, "step": 35500 }, { "epoch": 36.0, "eval_loss": 2.3153350353240967, "eval_mae": 1.178168773651123, "eval_mse": 2.3153350353240967, "eval_rmse": 1.5216225385665894, "eval_runtime": 2.7673, "eval_samples_per_second": 355.946, "eval_steps_per_second": 44.81, "step": 35568 }, { "epoch": 36.44, "learning_rate": 2.7125506072874497e-06, "loss": 0.3451, "step": 36000 }, { "epoch": 36.94, "learning_rate": 2.6113360323886646e-06, "loss": 0.3837, "step": 36500 }, { "epoch": 37.0, "eval_loss": 2.2114505767822266, "eval_mae": 1.1500502824783325, "eval_mse": 2.2114510536193848, "eval_rmse": 1.4870948791503906, "eval_runtime": 4.6473, "eval_samples_per_second": 211.95, "eval_steps_per_second": 26.682, "step": 36556 }, { "epoch": 37.45, "learning_rate": 2.5101214574898787e-06, "loss": 0.3335, "step": 37000 }, { "epoch": 37.96, "learning_rate": 2.408906882591093e-06, "loss": 0.3724, "step": 37500 }, { "epoch": 38.0, "eval_loss": 2.190290927886963, "eval_mae": 1.1481947898864746, "eval_mse": 2.190290689468384, "eval_rmse": 1.4799630641937256, "eval_runtime": 4.4593, "eval_samples_per_second": 220.888, "eval_steps_per_second": 27.807, "step": 37544 }, { "epoch": 38.46, "learning_rate": 2.307692307692308e-06, "loss": 0.3631, "step": 38000 }, { "epoch": 38.97, "learning_rate": 2.2064777327935226e-06, "loss": 0.3525, "step": 38500 }, { "epoch": 39.0, "eval_loss": 2.208022117614746, "eval_mae": 1.1508959531784058, "eval_mse": 2.208022117614746, "eval_rmse": 1.4859415292739868, "eval_runtime": 4.5419, "eval_samples_per_second": 216.868, "eval_steps_per_second": 27.301, "step": 38532 }, { "epoch": 39.47, "learning_rate": 2.105263157894737e-06, "loss": 0.3589, "step": 39000 }, { "epoch": 39.98, "learning_rate": 2.0040485829959516e-06, "loss": 0.3477, "step": 39500 }, { "epoch": 40.0, "eval_loss": 2.2591028213500977, "eval_mae": 1.1669268608093262, "eval_mse": 2.2591030597686768, "eval_rmse": 1.5030312538146973, "eval_runtime": 4.628, "eval_samples_per_second": 212.837, "eval_steps_per_second": 26.794, "step": 39520 }, { "epoch": 40.49, "learning_rate": 1.902834008097166e-06, "loss": 0.3239, "step": 40000 }, { "epoch": 40.99, "learning_rate": 1.8016194331983807e-06, "loss": 0.3891, "step": 40500 }, { "epoch": 41.0, "eval_loss": 2.2003180980682373, "eval_mae": 1.1513614654541016, "eval_mse": 2.2003180980682373, "eval_rmse": 1.483346939086914, "eval_runtime": 4.722, "eval_samples_per_second": 208.597, "eval_steps_per_second": 26.26, "step": 40508 }, { "epoch": 41.5, "learning_rate": 1.7004048582995952e-06, "loss": 0.3474, "step": 41000 }, { "epoch": 42.0, "eval_loss": 2.2396390438079834, "eval_mae": 1.1607612371444702, "eval_mse": 2.2396390438079834, "eval_rmse": 1.4965423345565796, "eval_runtime": 4.5927, "eval_samples_per_second": 214.471, "eval_steps_per_second": 26.999, "step": 41496 }, { "epoch": 42.0, "learning_rate": 1.59919028340081e-06, "loss": 0.347, "step": 41500 }, { "epoch": 42.51, "learning_rate": 1.4979757085020244e-06, "loss": 0.3173, "step": 42000 }, { "epoch": 43.0, "eval_loss": 2.2095425128936768, "eval_mae": 1.151437520980835, "eval_mse": 2.2095425128936768, "eval_rmse": 1.4864530563354492, "eval_runtime": 4.6099, "eval_samples_per_second": 213.67, "eval_steps_per_second": 26.899, "step": 42484 }, { "epoch": 43.02, "learning_rate": 1.3967611336032391e-06, "loss": 0.3691, "step": 42500 }, { "epoch": 43.52, "learning_rate": 1.2955465587044536e-06, "loss": 0.335, "step": 43000 }, { "epoch": 44.0, "eval_loss": 2.1894824504852295, "eval_mae": 1.146257758140564, "eval_mse": 2.1894824504852295, "eval_rmse": 1.4796899557113647, "eval_runtime": 4.6282, "eval_samples_per_second": 212.828, "eval_steps_per_second": 26.793, "step": 43472 }, { "epoch": 44.03, "learning_rate": 1.1943319838056681e-06, "loss": 0.3441, "step": 43500 }, { "epoch": 44.53, "learning_rate": 1.0931174089068828e-06, "loss": 0.3443, "step": 44000 }, { "epoch": 45.0, "eval_loss": 2.2246272563934326, "eval_mae": 1.1584477424621582, "eval_mse": 2.2246272563934326, "eval_rmse": 1.491518497467041, "eval_runtime": 4.7245, "eval_samples_per_second": 208.487, "eval_steps_per_second": 26.246, "step": 44460 }, { "epoch": 45.04, "learning_rate": 9.919028340080973e-07, "loss": 0.3242, "step": 44500 }, { "epoch": 45.55, "learning_rate": 8.906882591093118e-07, "loss": 0.3604, "step": 45000 }, { "epoch": 46.0, "eval_loss": 2.2161009311676025, "eval_mae": 1.1535395383834839, "eval_mse": 2.2161009311676025, "eval_rmse": 1.4886574745178223, "eval_runtime": 4.6251, "eval_samples_per_second": 212.97, "eval_steps_per_second": 26.81, "step": 45448 }, { "epoch": 46.05, "learning_rate": 7.894736842105263e-07, "loss": 0.3321, "step": 45500 }, { "epoch": 46.56, "learning_rate": 6.882591093117409e-07, "loss": 0.3422, "step": 46000 }, { "epoch": 47.0, "eval_loss": 2.2106308937072754, "eval_mae": 1.1536109447479248, "eval_mse": 2.2106311321258545, "eval_rmse": 1.4868191480636597, "eval_runtime": 4.7086, "eval_samples_per_second": 209.193, "eval_steps_per_second": 26.335, "step": 46436 }, { "epoch": 47.06, "learning_rate": 5.870445344129555e-07, "loss": 0.3223, "step": 46500 }, { "epoch": 47.57, "learning_rate": 4.858299595141701e-07, "loss": 0.3253, "step": 47000 }, { "epoch": 48.0, "eval_loss": 2.2084391117095947, "eval_mae": 1.1512514352798462, "eval_mse": 2.2084388732910156, "eval_rmse": 1.4860817193984985, "eval_runtime": 4.6784, "eval_samples_per_second": 210.541, "eval_steps_per_second": 26.505, "step": 47424 }, { "epoch": 48.08, "learning_rate": 3.846153846153847e-07, "loss": 0.3418, "step": 47500 }, { "epoch": 48.58, "learning_rate": 2.834008097165992e-07, "loss": 0.3309, "step": 48000 }, { "epoch": 49.0, "eval_loss": 2.2103824615478516, "eval_mae": 1.1521849632263184, "eval_mse": 2.2103824615478516, "eval_rmse": 1.486735463142395, "eval_runtime": 3.2286, "eval_samples_per_second": 305.083, "eval_steps_per_second": 38.406, "step": 48412 }, { "epoch": 49.09, "learning_rate": 1.8218623481781377e-07, "loss": 0.3491, "step": 48500 }, { "epoch": 49.6, "learning_rate": 8.097165991902835e-08, "loss": 0.3148, "step": 49000 }, { "epoch": 50.0, "eval_loss": 2.2166781425476074, "eval_mae": 1.1540616750717163, "eval_mse": 2.2166783809661865, "eval_rmse": 1.4888513088226318, "eval_runtime": 4.5752, "eval_samples_per_second": 215.291, "eval_steps_per_second": 27.103, "step": 49400 } ], "max_steps": 49400, "num_train_epochs": 50, "total_flos": 2.61545223472896e+16, "trial_name": null, "trial_params": null }