|
{ |
|
"best_metric": 6389.54150390625, |
|
"best_model_checkpoint": "./coco_outputs/checkpoint-196", |
|
"epoch": 50.0, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6398.52734375, |
|
"eval_runtime": 0.5289, |
|
"eval_samples_per_second": 15.126, |
|
"eval_steps_per_second": 1.891, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 6397.9267578125, |
|
"eval_runtime": 0.5329, |
|
"eval_samples_per_second": 15.013, |
|
"eval_steps_per_second": 1.877, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 6175.8695, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 6397.248046875, |
|
"eval_runtime": 0.5325, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.878, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 6396.56494140625, |
|
"eval_runtime": 0.5866, |
|
"eval_samples_per_second": 13.638, |
|
"eval_steps_per_second": 1.705, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 6292.1676, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6396.06689453125, |
|
"eval_runtime": 0.5343, |
|
"eval_samples_per_second": 14.974, |
|
"eval_steps_per_second": 1.872, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 6395.65966796875, |
|
"eval_runtime": 0.5271, |
|
"eval_samples_per_second": 15.178, |
|
"eval_steps_per_second": 1.897, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 6395.27392578125, |
|
"eval_runtime": 0.5241, |
|
"eval_samples_per_second": 15.264, |
|
"eval_steps_per_second": 1.908, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.7e-05, |
|
"loss": 6230.1191, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 6394.890625, |
|
"eval_runtime": 0.5365, |
|
"eval_samples_per_second": 14.911, |
|
"eval_steps_per_second": 1.864, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 6394.66796875, |
|
"eval_runtime": 0.5263, |
|
"eval_samples_per_second": 15.201, |
|
"eval_steps_per_second": 1.9, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 6089.0629, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 6394.51513671875, |
|
"eval_runtime": 0.5595, |
|
"eval_samples_per_second": 14.298, |
|
"eval_steps_per_second": 1.787, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 6394.369140625, |
|
"eval_runtime": 0.5347, |
|
"eval_samples_per_second": 14.961, |
|
"eval_steps_per_second": 1.87, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 6394.39306640625, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 13.188, |
|
"eval_steps_per_second": 1.648, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 6266.6961, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 6394.3232421875, |
|
"eval_runtime": 0.5315, |
|
"eval_samples_per_second": 15.053, |
|
"eval_steps_per_second": 1.882, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 6394.1572265625, |
|
"eval_runtime": 0.5445, |
|
"eval_samples_per_second": 14.694, |
|
"eval_steps_per_second": 1.837, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.4e-05, |
|
"loss": 6383.0688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 6394.15576171875, |
|
"eval_runtime": 0.5377, |
|
"eval_samples_per_second": 14.877, |
|
"eval_steps_per_second": 1.86, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 6393.953125, |
|
"eval_runtime": 0.5413, |
|
"eval_samples_per_second": 14.779, |
|
"eval_steps_per_second": 1.847, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 6393.77197265625, |
|
"eval_runtime": 0.5527, |
|
"eval_samples_per_second": 14.475, |
|
"eval_steps_per_second": 1.809, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 6323.732, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 6393.60498046875, |
|
"eval_runtime": 0.5502, |
|
"eval_samples_per_second": 14.54, |
|
"eval_steps_per_second": 1.817, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 6393.5107421875, |
|
"eval_runtime": 0.5415, |
|
"eval_samples_per_second": 14.774, |
|
"eval_steps_per_second": 1.847, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 6262.8367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 6393.23046875, |
|
"eval_runtime": 0.5438, |
|
"eval_samples_per_second": 14.71, |
|
"eval_steps_per_second": 1.839, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 6393.0869140625, |
|
"eval_runtime": 0.5423, |
|
"eval_samples_per_second": 14.753, |
|
"eval_steps_per_second": 1.844, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 6392.791015625, |
|
"eval_runtime": 0.5523, |
|
"eval_samples_per_second": 14.486, |
|
"eval_steps_per_second": 1.811, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 6294.5953, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 6392.60791015625, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 14.407, |
|
"eval_steps_per_second": 1.801, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 6392.46337890625, |
|
"eval_runtime": 0.542, |
|
"eval_samples_per_second": 14.759, |
|
"eval_steps_per_second": 1.845, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1e-05, |
|
"loss": 6454.1902, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 6392.30859375, |
|
"eval_runtime": 0.5462, |
|
"eval_samples_per_second": 14.647, |
|
"eval_steps_per_second": 1.831, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 6392.1982421875, |
|
"eval_runtime": 0.5465, |
|
"eval_samples_per_second": 14.64, |
|
"eval_steps_per_second": 1.83, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 6391.966796875, |
|
"eval_runtime": 0.5565, |
|
"eval_samples_per_second": 14.375, |
|
"eval_steps_per_second": 1.797, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 9e-06, |
|
"loss": 6325.2328, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 6391.6533203125, |
|
"eval_runtime": 0.5394, |
|
"eval_samples_per_second": 14.833, |
|
"eval_steps_per_second": 1.854, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 6391.34814453125, |
|
"eval_runtime": 0.5379, |
|
"eval_samples_per_second": 14.872, |
|
"eval_steps_per_second": 1.859, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 6471.3637, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 6391.04736328125, |
|
"eval_runtime": 0.5998, |
|
"eval_samples_per_second": 13.338, |
|
"eval_steps_per_second": 1.667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 6390.94091796875, |
|
"eval_runtime": 0.5467, |
|
"eval_samples_per_second": 14.632, |
|
"eval_steps_per_second": 1.829, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 6390.81982421875, |
|
"eval_runtime": 0.5421, |
|
"eval_samples_per_second": 14.759, |
|
"eval_steps_per_second": 1.845, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 7e-06, |
|
"loss": 6308.2656, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 6390.673828125, |
|
"eval_runtime": 0.5411, |
|
"eval_samples_per_second": 14.785, |
|
"eval_steps_per_second": 1.848, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 6390.6689453125, |
|
"eval_runtime": 0.5495, |
|
"eval_samples_per_second": 14.56, |
|
"eval_steps_per_second": 1.82, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6e-06, |
|
"loss": 6352.0371, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 6390.62451171875, |
|
"eval_runtime": 0.5561, |
|
"eval_samples_per_second": 14.385, |
|
"eval_steps_per_second": 1.798, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 6390.5791015625, |
|
"eval_runtime": 0.5601, |
|
"eval_samples_per_second": 14.282, |
|
"eval_steps_per_second": 1.785, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 6390.419921875, |
|
"eval_runtime": 0.5447, |
|
"eval_samples_per_second": 14.686, |
|
"eval_steps_per_second": 1.836, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 5e-06, |
|
"loss": 6247.1492, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 6390.23046875, |
|
"eval_runtime": 0.5405, |
|
"eval_samples_per_second": 14.8, |
|
"eval_steps_per_second": 1.85, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 6390.07763671875, |
|
"eval_runtime": 0.5527, |
|
"eval_samples_per_second": 14.473, |
|
"eval_steps_per_second": 1.809, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 6385.748, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 6389.9873046875, |
|
"eval_runtime": 0.5425, |
|
"eval_samples_per_second": 14.746, |
|
"eval_steps_per_second": 1.843, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 6389.91845703125, |
|
"eval_runtime": 0.5483, |
|
"eval_samples_per_second": 14.591, |
|
"eval_steps_per_second": 1.824, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 6389.828125, |
|
"eval_runtime": 0.5625, |
|
"eval_samples_per_second": 14.221, |
|
"eval_steps_per_second": 1.778, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 3e-06, |
|
"loss": 6262.0703, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 6389.75, |
|
"eval_runtime": 0.5518, |
|
"eval_samples_per_second": 14.498, |
|
"eval_steps_per_second": 1.812, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 6389.67919921875, |
|
"eval_runtime": 0.5455, |
|
"eval_samples_per_second": 14.666, |
|
"eval_steps_per_second": 1.833, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 6256.2105, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 6389.6376953125, |
|
"eval_runtime": 0.5593, |
|
"eval_samples_per_second": 14.304, |
|
"eval_steps_per_second": 1.788, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 6389.6201171875, |
|
"eval_runtime": 0.555, |
|
"eval_samples_per_second": 14.413, |
|
"eval_steps_per_second": 1.802, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 6389.57275390625, |
|
"eval_runtime": 0.5436, |
|
"eval_samples_per_second": 14.718, |
|
"eval_steps_per_second": 1.84, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 6353.4496, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 6389.5517578125, |
|
"eval_runtime": 0.55, |
|
"eval_samples_per_second": 14.545, |
|
"eval_steps_per_second": 1.818, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 6389.54150390625, |
|
"eval_runtime": 0.5412, |
|
"eval_samples_per_second": 14.782, |
|
"eval_steps_per_second": 1.848, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0, |
|
"loss": 6211.1777, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 6389.552734375, |
|
"eval_runtime": 0.5488, |
|
"eval_samples_per_second": 14.577, |
|
"eval_steps_per_second": 1.822, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 200, |
|
"total_flos": 7.65002115072e+17, |
|
"train_loss": 6297.2521484375, |
|
"train_runtime": 886.6987, |
|
"train_samples_per_second": 1.804, |
|
"train_steps_per_second": 0.226 |
|
} |
|
], |
|
"max_steps": 200, |
|
"num_train_epochs": 50, |
|
"total_flos": 7.65002115072e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|