|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.026963262554766, |
|
"global_step": 12960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8e-05, |
|
"loss": 1.9241, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016, |
|
"loss": 1.8026, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.7006735801696777, |
|
"eval_runtime": 120.5725, |
|
"eval_samples_per_second": 46.437, |
|
"eval_steps_per_second": 0.73, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7588, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7242, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 1.6368365287780762, |
|
"eval_runtime": 123.5326, |
|
"eval_samples_per_second": 45.324, |
|
"eval_steps_per_second": 0.712, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6797, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6544, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 1.589858889579773, |
|
"eval_runtime": 121.9204, |
|
"eval_samples_per_second": 45.923, |
|
"eval_steps_per_second": 0.722, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.639, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6103, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 1.559193730354309, |
|
"eval_runtime": 118.3836, |
|
"eval_samples_per_second": 47.295, |
|
"eval_steps_per_second": 0.743, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5982, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5858, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 1.5362491607666016, |
|
"eval_runtime": 123.4422, |
|
"eval_samples_per_second": 45.357, |
|
"eval_steps_per_second": 0.713, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5684, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5566, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 1.51528799533844, |
|
"eval_runtime": 120.2858, |
|
"eval_samples_per_second": 46.547, |
|
"eval_steps_per_second": 0.732, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5593, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5322, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 1.5114836692810059, |
|
"eval_runtime": 117.8482, |
|
"eval_samples_per_second": 47.51, |
|
"eval_steps_per_second": 0.747, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5285, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5359, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 1.48625910282135, |
|
"eval_runtime": 123.7493, |
|
"eval_samples_per_second": 45.245, |
|
"eval_steps_per_second": 0.711, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5207, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5079, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 1.4822603464126587, |
|
"eval_runtime": 119.2091, |
|
"eval_samples_per_second": 46.968, |
|
"eval_steps_per_second": 0.738, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0002, |
|
"loss": 1.51, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4909, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 1.4646539688110352, |
|
"eval_runtime": 122.8427, |
|
"eval_samples_per_second": 45.579, |
|
"eval_steps_per_second": 0.716, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4869, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4894, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"eval_loss": 1.4567737579345703, |
|
"eval_runtime": 112.4698, |
|
"eval_samples_per_second": 49.782, |
|
"eval_steps_per_second": 0.782, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4705, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0002, |
|
"loss": 1.469, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 1.447322130203247, |
|
"eval_runtime": 124.434, |
|
"eval_samples_per_second": 44.996, |
|
"eval_steps_per_second": 0.707, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4525, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_loss": 1.4480490684509277, |
|
"eval_runtime": 120.9825, |
|
"eval_samples_per_second": 46.279, |
|
"eval_steps_per_second": 0.727, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.452, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4552, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_loss": 1.4297771453857422, |
|
"eval_runtime": 119.4349, |
|
"eval_samples_per_second": 46.879, |
|
"eval_steps_per_second": 0.737, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4369, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4357, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 1.4253787994384766, |
|
"eval_runtime": 123.7286, |
|
"eval_samples_per_second": 45.252, |
|
"eval_steps_per_second": 0.711, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4449, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4245, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 1.419893741607666, |
|
"eval_runtime": 122.5962, |
|
"eval_samples_per_second": 45.67, |
|
"eval_steps_per_second": 0.718, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4259, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4317, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_loss": 1.4151264429092407, |
|
"eval_runtime": 120.6018, |
|
"eval_samples_per_second": 46.426, |
|
"eval_steps_per_second": 0.73, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4133, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4119, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"eval_loss": 1.4069455862045288, |
|
"eval_runtime": 123.9031, |
|
"eval_samples_per_second": 45.189, |
|
"eval_steps_per_second": 0.71, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4096, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4086, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"eval_loss": 1.4099173545837402, |
|
"eval_runtime": 121.1011, |
|
"eval_samples_per_second": 46.234, |
|
"eval_steps_per_second": 0.727, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4031, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.401, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"eval_loss": 1.4046831130981445, |
|
"eval_runtime": 121.8177, |
|
"eval_samples_per_second": 45.962, |
|
"eval_steps_per_second": 0.722, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4031, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.394, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"eval_loss": 1.401537299156189, |
|
"eval_runtime": 121.4356, |
|
"eval_samples_per_second": 46.107, |
|
"eval_steps_per_second": 0.725, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3922, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3945, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"eval_loss": 1.3918230533599854, |
|
"eval_runtime": 119.2233, |
|
"eval_samples_per_second": 46.962, |
|
"eval_steps_per_second": 0.738, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3836, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3838, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 1.385350227355957, |
|
"eval_runtime": 113.4489, |
|
"eval_samples_per_second": 49.353, |
|
"eval_steps_per_second": 0.776, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.387, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3722, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 1.379088282585144, |
|
"eval_runtime": 116.4932, |
|
"eval_samples_per_second": 48.063, |
|
"eval_steps_per_second": 0.755, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3757, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3775, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"eval_loss": 1.384007453918457, |
|
"eval_runtime": 115.8099, |
|
"eval_samples_per_second": 48.346, |
|
"eval_steps_per_second": 0.76, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3683, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3675, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"eval_loss": 1.3760778903961182, |
|
"eval_runtime": 113.2638, |
|
"eval_samples_per_second": 49.433, |
|
"eval_steps_per_second": 0.777, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.375, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.358, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"eval_loss": 1.3729970455169678, |
|
"eval_runtime": 119.1962, |
|
"eval_samples_per_second": 46.973, |
|
"eval_steps_per_second": 0.738, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3617, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3679, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"eval_loss": 1.3826600313186646, |
|
"eval_runtime": 118.9849, |
|
"eval_samples_per_second": 47.056, |
|
"eval_steps_per_second": 0.74, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3592, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3602, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"eval_loss": 1.3659363985061646, |
|
"eval_runtime": 120.7081, |
|
"eval_samples_per_second": 46.385, |
|
"eval_steps_per_second": 0.729, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3633, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3522, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"eval_loss": 1.372406244277954, |
|
"eval_runtime": 113.6178, |
|
"eval_samples_per_second": 49.279, |
|
"eval_steps_per_second": 0.775, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.0002, |
|
"loss": 1.345, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3555, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"eval_loss": 1.368371844291687, |
|
"eval_runtime": 118.9369, |
|
"eval_samples_per_second": 47.075, |
|
"eval_steps_per_second": 0.74, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3396, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3536, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 1.3611598014831543, |
|
"eval_runtime": 119.3386, |
|
"eval_samples_per_second": 46.917, |
|
"eval_steps_per_second": 0.737, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3506, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3347, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"eval_loss": 1.3598804473876953, |
|
"eval_runtime": 114.0961, |
|
"eval_samples_per_second": 49.073, |
|
"eval_steps_per_second": 0.771, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.338, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3463, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"eval_loss": 1.3614617586135864, |
|
"eval_runtime": 121.7757, |
|
"eval_samples_per_second": 45.978, |
|
"eval_steps_per_second": 0.723, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 22.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3305, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3296, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"eval_loss": 1.359055519104004, |
|
"eval_runtime": 113.3148, |
|
"eval_samples_per_second": 49.411, |
|
"eval_steps_per_second": 0.777, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 0.0002, |
|
"loss": 1.344, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3201, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"eval_loss": 1.358960509300232, |
|
"eval_runtime": 122.2886, |
|
"eval_samples_per_second": 45.785, |
|
"eval_steps_per_second": 0.72, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3302, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3292, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.3509206771850586, |
|
"eval_runtime": 99.6058, |
|
"eval_samples_per_second": 56.212, |
|
"eval_steps_per_second": 0.883, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3294, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3207, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"eval_loss": 1.357851505279541, |
|
"eval_runtime": 105.9073, |
|
"eval_samples_per_second": 52.867, |
|
"eval_steps_per_second": 0.831, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 24.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3215, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3231, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"eval_loss": 1.3393853902816772, |
|
"eval_runtime": 99.7219, |
|
"eval_samples_per_second": 56.146, |
|
"eval_steps_per_second": 0.882, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3121, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3176, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_loss": 1.3441215753555298, |
|
"eval_runtime": 101.3937, |
|
"eval_samples_per_second": 55.22, |
|
"eval_steps_per_second": 0.868, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3188, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3103, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"eval_loss": 1.3429008722305298, |
|
"eval_runtime": 100.8116, |
|
"eval_samples_per_second": 55.539, |
|
"eval_steps_per_second": 0.873, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.313, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3156, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"eval_loss": 1.3400343656539917, |
|
"eval_runtime": 98.2948, |
|
"eval_samples_per_second": 56.961, |
|
"eval_steps_per_second": 0.895, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3064, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.306, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"eval_loss": 1.339460015296936, |
|
"eval_runtime": 97.8707, |
|
"eval_samples_per_second": 57.208, |
|
"eval_steps_per_second": 0.899, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3093, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 28.54, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3026, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 28.54, |
|
"eval_loss": 1.3380861282348633, |
|
"eval_runtime": 99.7827, |
|
"eval_samples_per_second": 56.112, |
|
"eval_steps_per_second": 0.882, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3014, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 29.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3093, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 29.19, |
|
"eval_loss": 1.335351824760437, |
|
"eval_runtime": 99.7514, |
|
"eval_samples_per_second": 56.13, |
|
"eval_steps_per_second": 0.882, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2954, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2982, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"eval_loss": 1.33037269115448, |
|
"eval_runtime": 111.392, |
|
"eval_samples_per_second": 50.264, |
|
"eval_steps_per_second": 0.79, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3032, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2927, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"eval_loss": 1.3423055410385132, |
|
"eval_runtime": 110.815, |
|
"eval_samples_per_second": 50.526, |
|
"eval_steps_per_second": 0.794, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2968, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3003, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 1.3345474004745483, |
|
"eval_runtime": 100.6956, |
|
"eval_samples_per_second": 55.603, |
|
"eval_steps_per_second": 0.874, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 31.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2865, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2928, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"eval_loss": 1.337437629699707, |
|
"eval_runtime": 97.2235, |
|
"eval_samples_per_second": 57.589, |
|
"eval_steps_per_second": 0.905, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 32.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2981, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2847, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"eval_loss": 1.3236644268035889, |
|
"eval_runtime": 97.4026, |
|
"eval_samples_per_second": 57.483, |
|
"eval_steps_per_second": 0.903, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 32.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2871, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 33.08, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2966, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 33.08, |
|
"eval_loss": 1.332656741142273, |
|
"eval_runtime": 97.3643, |
|
"eval_samples_per_second": 57.506, |
|
"eval_steps_per_second": 0.904, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2789, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2829, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"eval_loss": 1.3252918720245361, |
|
"eval_runtime": 104.7279, |
|
"eval_samples_per_second": 53.462, |
|
"eval_steps_per_second": 0.84, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 34.05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2926, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 34.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2756, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 34.38, |
|
"eval_loss": 1.326663613319397, |
|
"eval_runtime": 98.2526, |
|
"eval_samples_per_second": 56.986, |
|
"eval_steps_per_second": 0.896, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 34.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2801, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2919, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_loss": 1.3183717727661133, |
|
"eval_runtime": 99.1376, |
|
"eval_samples_per_second": 56.477, |
|
"eval_steps_per_second": 0.888, |
|
"step": 12960 |
|
} |
|
], |
|
"max_steps": 14000, |
|
"num_train_epochs": 38, |
|
"total_flos": 1.7505797492048026e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|