|
{ |
|
"best_metric": 6388.880859375, |
|
"best_model_checkpoint": "./coco_outputs/checkpoint-200", |
|
"epoch": 50.0, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6398.4208984375, |
|
"eval_runtime": 0.5249, |
|
"eval_samples_per_second": 15.242, |
|
"eval_steps_per_second": 1.905, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 6397.8583984375, |
|
"eval_runtime": 0.5335, |
|
"eval_samples_per_second": 14.995, |
|
"eval_steps_per_second": 1.874, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 6370.2805, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 6397.47900390625, |
|
"eval_runtime": 0.5389, |
|
"eval_samples_per_second": 14.844, |
|
"eval_steps_per_second": 1.855, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 6396.83203125, |
|
"eval_runtime": 0.5405, |
|
"eval_samples_per_second": 14.802, |
|
"eval_steps_per_second": 1.85, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 6424.3547, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6396.29296875, |
|
"eval_runtime": 0.5443, |
|
"eval_samples_per_second": 14.697, |
|
"eval_steps_per_second": 1.837, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 6395.72314453125, |
|
"eval_runtime": 0.5415, |
|
"eval_samples_per_second": 14.775, |
|
"eval_steps_per_second": 1.847, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 6395.1376953125, |
|
"eval_runtime": 0.5498, |
|
"eval_samples_per_second": 14.55, |
|
"eval_steps_per_second": 1.819, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.7e-05, |
|
"loss": 6477.4051, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 6394.81884765625, |
|
"eval_runtime": 0.5491, |
|
"eval_samples_per_second": 14.568, |
|
"eval_steps_per_second": 1.821, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 6394.478515625, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 14.407, |
|
"eval_steps_per_second": 1.801, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 6381.9848, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 6394.22119140625, |
|
"eval_runtime": 0.5529, |
|
"eval_samples_per_second": 14.469, |
|
"eval_steps_per_second": 1.809, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 6394.06640625, |
|
"eval_runtime": 0.5511, |
|
"eval_samples_per_second": 14.516, |
|
"eval_steps_per_second": 1.814, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 6393.92529296875, |
|
"eval_runtime": 0.5545, |
|
"eval_samples_per_second": 14.426, |
|
"eval_steps_per_second": 1.803, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 6343.784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 6393.734375, |
|
"eval_runtime": 0.552, |
|
"eval_samples_per_second": 14.494, |
|
"eval_steps_per_second": 1.812, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 6393.61279296875, |
|
"eval_runtime": 0.5519, |
|
"eval_samples_per_second": 14.496, |
|
"eval_steps_per_second": 1.812, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.4e-05, |
|
"loss": 6458.8668, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 6393.43798828125, |
|
"eval_runtime": 0.5498, |
|
"eval_samples_per_second": 14.551, |
|
"eval_steps_per_second": 1.819, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 6393.2392578125, |
|
"eval_runtime": 0.5467, |
|
"eval_samples_per_second": 14.633, |
|
"eval_steps_per_second": 1.829, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 6393.10400390625, |
|
"eval_runtime": 0.546, |
|
"eval_samples_per_second": 14.651, |
|
"eval_steps_per_second": 1.831, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 6414.077, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 6392.95751953125, |
|
"eval_runtime": 0.5445, |
|
"eval_samples_per_second": 14.693, |
|
"eval_steps_per_second": 1.837, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 6392.830078125, |
|
"eval_runtime": 0.553, |
|
"eval_samples_per_second": 14.467, |
|
"eval_steps_per_second": 1.808, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 6417.8516, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 6392.3056640625, |
|
"eval_runtime": 0.5433, |
|
"eval_samples_per_second": 14.725, |
|
"eval_steps_per_second": 1.841, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 6391.8310546875, |
|
"eval_runtime": 0.5461, |
|
"eval_samples_per_second": 14.65, |
|
"eval_steps_per_second": 1.831, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 6391.55322265625, |
|
"eval_runtime": 0.5499, |
|
"eval_samples_per_second": 14.547, |
|
"eval_steps_per_second": 1.818, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 6333.3547, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 6391.34033203125, |
|
"eval_runtime": 0.5549, |
|
"eval_samples_per_second": 14.418, |
|
"eval_steps_per_second": 1.802, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 6391.193359375, |
|
"eval_runtime": 0.5535, |
|
"eval_samples_per_second": 14.454, |
|
"eval_steps_per_second": 1.807, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1e-05, |
|
"loss": 6455.1539, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 6390.97412109375, |
|
"eval_runtime": 0.5532, |
|
"eval_samples_per_second": 14.46, |
|
"eval_steps_per_second": 1.808, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 6390.82275390625, |
|
"eval_runtime": 0.562, |
|
"eval_samples_per_second": 14.234, |
|
"eval_steps_per_second": 1.779, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 6390.7607421875, |
|
"eval_runtime": 0.5645, |
|
"eval_samples_per_second": 14.171, |
|
"eval_steps_per_second": 1.771, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 9e-06, |
|
"loss": 6399.7898, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 6390.66552734375, |
|
"eval_runtime": 0.558, |
|
"eval_samples_per_second": 14.338, |
|
"eval_steps_per_second": 1.792, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 6390.5859375, |
|
"eval_runtime": 0.5544, |
|
"eval_samples_per_second": 14.429, |
|
"eval_steps_per_second": 1.804, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 6410.9336, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 6390.49072265625, |
|
"eval_runtime": 0.5598, |
|
"eval_samples_per_second": 14.291, |
|
"eval_steps_per_second": 1.786, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 6390.3388671875, |
|
"eval_runtime": 0.562, |
|
"eval_samples_per_second": 14.236, |
|
"eval_steps_per_second": 1.779, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 6390.19775390625, |
|
"eval_runtime": 0.546, |
|
"eval_samples_per_second": 14.652, |
|
"eval_steps_per_second": 1.831, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 7e-06, |
|
"loss": 6409.2, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 6390.0341796875, |
|
"eval_runtime": 0.5441, |
|
"eval_samples_per_second": 14.704, |
|
"eval_steps_per_second": 1.838, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 6389.96240234375, |
|
"eval_runtime": 0.5472, |
|
"eval_samples_per_second": 14.619, |
|
"eval_steps_per_second": 1.827, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6e-06, |
|
"loss": 6406.6211, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 6389.9111328125, |
|
"eval_runtime": 0.5514, |
|
"eval_samples_per_second": 14.509, |
|
"eval_steps_per_second": 1.814, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 6389.6875, |
|
"eval_runtime": 0.5453, |
|
"eval_samples_per_second": 14.672, |
|
"eval_steps_per_second": 1.834, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 6389.4755859375, |
|
"eval_runtime": 0.5552, |
|
"eval_samples_per_second": 14.409, |
|
"eval_steps_per_second": 1.801, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 5e-06, |
|
"loss": 6371.1539, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 6389.3515625, |
|
"eval_runtime": 0.5423, |
|
"eval_samples_per_second": 14.751, |
|
"eval_steps_per_second": 1.844, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 6389.26953125, |
|
"eval_runtime": 0.5522, |
|
"eval_samples_per_second": 14.487, |
|
"eval_steps_per_second": 1.811, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 6409.1055, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 6389.24951171875, |
|
"eval_runtime": 0.5572, |
|
"eval_samples_per_second": 14.356, |
|
"eval_steps_per_second": 1.795, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 6389.208984375, |
|
"eval_runtime": 0.5378, |
|
"eval_samples_per_second": 14.876, |
|
"eval_steps_per_second": 1.86, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 6389.10986328125, |
|
"eval_runtime": 0.539, |
|
"eval_samples_per_second": 14.841, |
|
"eval_steps_per_second": 1.855, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 3e-06, |
|
"loss": 6453.5285, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 6389.04052734375, |
|
"eval_runtime": 0.54, |
|
"eval_samples_per_second": 14.815, |
|
"eval_steps_per_second": 1.852, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 6388.99365234375, |
|
"eval_runtime": 0.5364, |
|
"eval_samples_per_second": 14.915, |
|
"eval_steps_per_second": 1.864, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 6391.1004, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 6388.9541015625, |
|
"eval_runtime": 0.5495, |
|
"eval_samples_per_second": 14.558, |
|
"eval_steps_per_second": 1.82, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 6388.923828125, |
|
"eval_runtime": 0.5496, |
|
"eval_samples_per_second": 14.557, |
|
"eval_steps_per_second": 1.82, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 6388.90771484375, |
|
"eval_runtime": 0.5379, |
|
"eval_samples_per_second": 14.871, |
|
"eval_steps_per_second": 1.859, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 6416.6641, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 6388.89111328125, |
|
"eval_runtime": 0.5385, |
|
"eval_samples_per_second": 14.857, |
|
"eval_steps_per_second": 1.857, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 6388.8828125, |
|
"eval_runtime": 0.5466, |
|
"eval_samples_per_second": 14.635, |
|
"eval_steps_per_second": 1.829, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0, |
|
"loss": 6397.6828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 6388.880859375, |
|
"eval_runtime": 0.5601, |
|
"eval_samples_per_second": 14.283, |
|
"eval_steps_per_second": 1.785, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 200, |
|
"total_flos": 7.65002115072e+17, |
|
"train_loss": 6407.14462890625, |
|
"train_runtime": 869.7842, |
|
"train_samples_per_second": 1.84, |
|
"train_steps_per_second": 0.23 |
|
} |
|
], |
|
"max_steps": 200, |
|
"num_train_epochs": 50, |
|
"total_flos": 7.65002115072e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|