|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 30.29538917541504, |
|
"eval_mae": 3.445791244506836, |
|
"eval_mse": 30.29538917541504, |
|
"eval_rmse": 5.504124641418457, |
|
"eval_runtime": 0.1613, |
|
"eval_samples_per_second": 396.717, |
|
"eval_steps_per_second": 49.59, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 22.2762508392334, |
|
"eval_mae": 2.9041895866394043, |
|
"eval_mse": 22.276248931884766, |
|
"eval_rmse": 4.7197723388671875, |
|
"eval_runtime": 0.1462, |
|
"eval_samples_per_second": 437.674, |
|
"eval_steps_per_second": 54.709, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 19.444108963012695, |
|
"eval_mae": 2.726284980773926, |
|
"eval_mse": 19.444108963012695, |
|
"eval_rmse": 4.409547328948975, |
|
"eval_runtime": 0.2087, |
|
"eval_samples_per_second": 306.646, |
|
"eval_steps_per_second": 38.331, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 17.12045669555664, |
|
"eval_mae": 2.5535168647766113, |
|
"eval_mse": 17.12045669555664, |
|
"eval_rmse": 4.1376872062683105, |
|
"eval_runtime": 0.3272, |
|
"eval_samples_per_second": 195.59, |
|
"eval_steps_per_second": 24.449, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 16.316720962524414, |
|
"eval_mae": 2.692227840423584, |
|
"eval_mse": 16.316720962524414, |
|
"eval_rmse": 4.039396286010742, |
|
"eval_runtime": 0.4287, |
|
"eval_samples_per_second": 149.284, |
|
"eval_steps_per_second": 18.661, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 15.293220520019531, |
|
"eval_mae": 2.601778268814087, |
|
"eval_mse": 15.293218612670898, |
|
"eval_rmse": 3.9106545448303223, |
|
"eval_runtime": 0.2956, |
|
"eval_samples_per_second": 216.483, |
|
"eval_steps_per_second": 27.06, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 14.56900691986084, |
|
"eval_mae": 2.5558180809020996, |
|
"eval_mse": 14.569008827209473, |
|
"eval_rmse": 3.816937208175659, |
|
"eval_runtime": 0.4164, |
|
"eval_samples_per_second": 153.681, |
|
"eval_steps_per_second": 19.21, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 8.4375e-06, |
|
"loss": 3.9689, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 13.64460277557373, |
|
"eval_mae": 2.39548397064209, |
|
"eval_mse": 13.64460277557373, |
|
"eval_rmse": 3.6938600540161133, |
|
"eval_runtime": 0.4311, |
|
"eval_samples_per_second": 148.456, |
|
"eval_steps_per_second": 18.557, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 13.105853080749512, |
|
"eval_mae": 2.4013423919677734, |
|
"eval_mse": 13.105853080749512, |
|
"eval_rmse": 3.6202006340026855, |
|
"eval_runtime": 0.4297, |
|
"eval_samples_per_second": 148.951, |
|
"eval_steps_per_second": 18.619, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 13.048660278320312, |
|
"eval_mae": 2.3826305866241455, |
|
"eval_mse": 13.048660278320312, |
|
"eval_rmse": 3.612293004989624, |
|
"eval_runtime": 0.2978, |
|
"eval_samples_per_second": 214.88, |
|
"eval_steps_per_second": 26.86, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 12.558533668518066, |
|
"eval_mae": 2.3683643341064453, |
|
"eval_mse": 12.55853271484375, |
|
"eval_rmse": 3.54380202293396, |
|
"eval_runtime": 0.4369, |
|
"eval_samples_per_second": 146.483, |
|
"eval_steps_per_second": 18.31, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 12.03190803527832, |
|
"eval_mae": 2.2633650302886963, |
|
"eval_mse": 12.03190803527832, |
|
"eval_rmse": 3.4687039852142334, |
|
"eval_runtime": 0.4152, |
|
"eval_samples_per_second": 154.131, |
|
"eval_steps_per_second": 19.266, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 12.05549144744873, |
|
"eval_mae": 2.288928508758545, |
|
"eval_mse": 12.05549144744873, |
|
"eval_rmse": 3.472101926803589, |
|
"eval_runtime": 0.4311, |
|
"eval_samples_per_second": 148.454, |
|
"eval_steps_per_second": 18.557, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 11.724568367004395, |
|
"eval_mae": 2.2129979133605957, |
|
"eval_mse": 11.724568367004395, |
|
"eval_rmse": 3.4241156578063965, |
|
"eval_runtime": 0.4413, |
|
"eval_samples_per_second": 145.038, |
|
"eval_steps_per_second": 18.13, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 11.610068321228027, |
|
"eval_mae": 2.238194227218628, |
|
"eval_mse": 11.610069274902344, |
|
"eval_rmse": 3.4073550701141357, |
|
"eval_runtime": 0.41, |
|
"eval_samples_per_second": 156.087, |
|
"eval_steps_per_second": 19.511, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.8847, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 11.409879684448242, |
|
"eval_mae": 2.171018123626709, |
|
"eval_mse": 11.40987777709961, |
|
"eval_rmse": 3.3778510093688965, |
|
"eval_runtime": 0.4279, |
|
"eval_samples_per_second": 149.579, |
|
"eval_steps_per_second": 18.697, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 11.323513984680176, |
|
"eval_mae": 2.1948208808898926, |
|
"eval_mse": 11.323514938354492, |
|
"eval_rmse": 3.3650431632995605, |
|
"eval_runtime": 0.4359, |
|
"eval_samples_per_second": 146.809, |
|
"eval_steps_per_second": 18.351, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 10.922426223754883, |
|
"eval_mae": 2.1065478324890137, |
|
"eval_mse": 10.9224271774292, |
|
"eval_rmse": 3.3049094676971436, |
|
"eval_runtime": 0.4318, |
|
"eval_samples_per_second": 148.218, |
|
"eval_steps_per_second": 18.527, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 10.802040100097656, |
|
"eval_mae": 2.1256909370422363, |
|
"eval_mse": 10.80203914642334, |
|
"eval_rmse": 3.2866456508636475, |
|
"eval_runtime": 0.2822, |
|
"eval_samples_per_second": 226.81, |
|
"eval_steps_per_second": 28.351, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 10.668560981750488, |
|
"eval_mae": 2.102822780609131, |
|
"eval_mse": 10.668560981750488, |
|
"eval_rmse": 3.2662763595581055, |
|
"eval_runtime": 0.4297, |
|
"eval_samples_per_second": 148.924, |
|
"eval_steps_per_second": 18.616, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 10.928054809570312, |
|
"eval_mae": 2.201329231262207, |
|
"eval_mse": 10.928054809570312, |
|
"eval_rmse": 3.3057608604431152, |
|
"eval_runtime": 0.4133, |
|
"eval_samples_per_second": 154.835, |
|
"eval_steps_per_second": 19.354, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 10.920714378356934, |
|
"eval_mae": 2.1609740257263184, |
|
"eval_mse": 10.920713424682617, |
|
"eval_rmse": 3.30465030670166, |
|
"eval_runtime": 0.4362, |
|
"eval_samples_per_second": 146.723, |
|
"eval_steps_per_second": 18.34, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 10.914674758911133, |
|
"eval_mae": 2.1464970111846924, |
|
"eval_mse": 10.914674758911133, |
|
"eval_rmse": 3.303736448287964, |
|
"eval_runtime": 0.3154, |
|
"eval_samples_per_second": 202.948, |
|
"eval_steps_per_second": 25.369, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"learning_rate": 5.3125e-06, |
|
"loss": 0.6489, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 10.826024055480957, |
|
"eval_mae": 2.166719436645508, |
|
"eval_mse": 10.826024055480957, |
|
"eval_rmse": 3.290292501449585, |
|
"eval_runtime": 0.4282, |
|
"eval_samples_per_second": 149.463, |
|
"eval_steps_per_second": 18.683, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 10.914568901062012, |
|
"eval_mae": 2.1764001846313477, |
|
"eval_mse": 10.914569854736328, |
|
"eval_rmse": 3.303720712661743, |
|
"eval_runtime": 0.4401, |
|
"eval_samples_per_second": 145.428, |
|
"eval_steps_per_second": 18.178, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 11.035606384277344, |
|
"eval_mae": 2.2225704193115234, |
|
"eval_mse": 11.035605430603027, |
|
"eval_rmse": 3.321988105773926, |
|
"eval_runtime": 0.4326, |
|
"eval_samples_per_second": 147.959, |
|
"eval_steps_per_second": 18.495, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 11.038973808288574, |
|
"eval_mae": 2.2142393589019775, |
|
"eval_mse": 11.038971900939941, |
|
"eval_rmse": 3.3224947452545166, |
|
"eval_runtime": 0.435, |
|
"eval_samples_per_second": 147.12, |
|
"eval_steps_per_second": 18.39, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 11.178277015686035, |
|
"eval_mae": 2.2388522624969482, |
|
"eval_mse": 11.178277015686035, |
|
"eval_rmse": 3.343393087387085, |
|
"eval_runtime": 0.4403, |
|
"eval_samples_per_second": 145.349, |
|
"eval_steps_per_second": 18.169, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 10.676009178161621, |
|
"eval_mae": 2.163010358810425, |
|
"eval_mse": 10.676008224487305, |
|
"eval_rmse": 3.26741623878479, |
|
"eval_runtime": 0.4243, |
|
"eval_samples_per_second": 150.831, |
|
"eval_steps_per_second": 18.854, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 10.856060028076172, |
|
"eval_mae": 2.1758837699890137, |
|
"eval_mse": 10.856060981750488, |
|
"eval_rmse": 3.294853687286377, |
|
"eval_runtime": 0.4294, |
|
"eval_samples_per_second": 149.044, |
|
"eval_steps_per_second": 18.63, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 10.435661315917969, |
|
"eval_mae": 2.1428065299987793, |
|
"eval_mse": 10.435661315917969, |
|
"eval_rmse": 3.2304275035858154, |
|
"eval_runtime": 0.4438, |
|
"eval_samples_per_second": 144.195, |
|
"eval_steps_per_second": 18.024, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.5499, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 10.568000793457031, |
|
"eval_mae": 2.1557540893554688, |
|
"eval_mse": 10.568000793457031, |
|
"eval_rmse": 3.2508461475372314, |
|
"eval_runtime": 0.4281, |
|
"eval_samples_per_second": 149.484, |
|
"eval_steps_per_second": 18.686, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 10.61816120147705, |
|
"eval_mae": 2.1454195976257324, |
|
"eval_mse": 10.618160247802734, |
|
"eval_rmse": 3.258551836013794, |
|
"eval_runtime": 0.418, |
|
"eval_samples_per_second": 153.1, |
|
"eval_steps_per_second": 19.137, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 10.70760726928711, |
|
"eval_mae": 2.185828924179077, |
|
"eval_mse": 10.707606315612793, |
|
"eval_rmse": 3.272247791290283, |
|
"eval_runtime": 0.4346, |
|
"eval_samples_per_second": 147.25, |
|
"eval_steps_per_second": 18.406, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 10.65221118927002, |
|
"eval_mae": 2.1475348472595215, |
|
"eval_mse": 10.65221118927002, |
|
"eval_rmse": 3.263772487640381, |
|
"eval_runtime": 0.4365, |
|
"eval_samples_per_second": 146.633, |
|
"eval_steps_per_second": 18.329, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 10.60755443572998, |
|
"eval_mae": 2.1597788333892822, |
|
"eval_mse": 10.607553482055664, |
|
"eval_rmse": 3.2569239139556885, |
|
"eval_runtime": 0.3132, |
|
"eval_samples_per_second": 204.312, |
|
"eval_steps_per_second": 25.539, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 10.558968544006348, |
|
"eval_mae": 2.1506295204162598, |
|
"eval_mse": 10.558967590332031, |
|
"eval_rmse": 3.2494564056396484, |
|
"eval_runtime": 0.4253, |
|
"eval_samples_per_second": 150.493, |
|
"eval_steps_per_second": 18.812, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 10.674619674682617, |
|
"eval_mae": 2.1740453243255615, |
|
"eval_mse": 10.674619674682617, |
|
"eval_rmse": 3.2672035694122314, |
|
"eval_runtime": 0.4355, |
|
"eval_samples_per_second": 146.966, |
|
"eval_steps_per_second": 18.371, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 10.524478912353516, |
|
"eval_mae": 2.1458582878112793, |
|
"eval_mse": 10.5244779586792, |
|
"eval_rmse": 3.244145154953003, |
|
"eval_runtime": 0.4374, |
|
"eval_samples_per_second": 146.309, |
|
"eval_steps_per_second": 18.289, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 39.06, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"loss": 0.5012, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 10.667997360229492, |
|
"eval_mae": 2.155714273452759, |
|
"eval_mse": 10.667997360229492, |
|
"eval_rmse": 3.2661900520324707, |
|
"eval_runtime": 0.3418, |
|
"eval_samples_per_second": 187.219, |
|
"eval_steps_per_second": 23.402, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 10.641304016113281, |
|
"eval_mae": 2.174771547317505, |
|
"eval_mse": 10.641304016113281, |
|
"eval_rmse": 3.262101173400879, |
|
"eval_runtime": 0.4419, |
|
"eval_samples_per_second": 144.834, |
|
"eval_steps_per_second": 18.104, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 10.72380542755127, |
|
"eval_mae": 2.2047784328460693, |
|
"eval_mse": 10.723804473876953, |
|
"eval_rmse": 3.274722099304199, |
|
"eval_runtime": 0.4025, |
|
"eval_samples_per_second": 158.999, |
|
"eval_steps_per_second": 19.875, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 10.794100761413574, |
|
"eval_mae": 2.212890863418579, |
|
"eval_mse": 10.79410171508789, |
|
"eval_rmse": 3.285437822341919, |
|
"eval_runtime": 0.4371, |
|
"eval_samples_per_second": 146.435, |
|
"eval_steps_per_second": 18.304, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 10.679245948791504, |
|
"eval_mae": 2.199676513671875, |
|
"eval_mse": 10.679245948791504, |
|
"eval_rmse": 3.267911672592163, |
|
"eval_runtime": 0.4079, |
|
"eval_samples_per_second": 156.894, |
|
"eval_steps_per_second": 19.612, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 10.524163246154785, |
|
"eval_mae": 2.1674587726593018, |
|
"eval_mse": 10.524163246154785, |
|
"eval_rmse": 3.2440967559814453, |
|
"eval_runtime": 0.2984, |
|
"eval_samples_per_second": 214.505, |
|
"eval_steps_per_second": 26.813, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 10.532914161682129, |
|
"eval_mae": 2.177356481552124, |
|
"eval_mse": 10.532913208007812, |
|
"eval_rmse": 3.2454450130462646, |
|
"eval_runtime": 0.4463, |
|
"eval_samples_per_second": 143.398, |
|
"eval_steps_per_second": 17.925, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.471, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 10.552495002746582, |
|
"eval_mae": 2.182786464691162, |
|
"eval_mse": 10.552494049072266, |
|
"eval_rmse": 3.248460292816162, |
|
"eval_runtime": 0.3828, |
|
"eval_samples_per_second": 167.191, |
|
"eval_steps_per_second": 20.899, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 10.577668190002441, |
|
"eval_mae": 2.177499294281006, |
|
"eval_mse": 10.577667236328125, |
|
"eval_rmse": 3.2523326873779297, |
|
"eval_runtime": 0.4436, |
|
"eval_samples_per_second": 144.276, |
|
"eval_steps_per_second": 18.035, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 10.599947929382324, |
|
"eval_mae": 2.184239625930786, |
|
"eval_mse": 10.59994888305664, |
|
"eval_rmse": 3.255756378173828, |
|
"eval_runtime": 0.2975, |
|
"eval_samples_per_second": 215.1, |
|
"eval_steps_per_second": 26.888, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 10.600202560424805, |
|
"eval_mae": 2.1849870681762695, |
|
"eval_mse": 10.600202560424805, |
|
"eval_rmse": 3.2557952404022217, |
|
"eval_runtime": 0.4398, |
|
"eval_samples_per_second": 145.517, |
|
"eval_steps_per_second": 18.19, |
|
"step": 3200 |
|
} |
|
], |
|
"max_steps": 3200, |
|
"num_train_epochs": 50, |
|
"total_flos": 1692195608947200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|