|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 495, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4e-05, |
|
"loss": 3.6345, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8e-05, |
|
"loss": 3.6023, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.2e-05, |
|
"loss": 3.5522, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.6e-05, |
|
"loss": 3.558, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00012, |
|
"loss": 3.5927, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000144, |
|
"loss": 3.4643, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.000168, |
|
"loss": 3.469, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.000192, |
|
"loss": 3.5263, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00019820224719101123, |
|
"loss": 3.4205, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001955056179775281, |
|
"loss": 3.3894, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00019280898876404497, |
|
"loss": 3.4589, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001901123595505618, |
|
"loss": 3.312, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018741573033707868, |
|
"loss": 3.4469, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00018471910112359552, |
|
"loss": 3.3239, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00018202247191011236, |
|
"loss": 3.4202, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00017932584269662923, |
|
"loss": 3.368, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00017662921348314607, |
|
"loss": 3.2992, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0001739325842696629, |
|
"loss": 3.2699, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00017123595505617978, |
|
"loss": 3.3384, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00016853932584269662, |
|
"loss": 3.3604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0001658426966292135, |
|
"loss": 3.2875, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00016314606741573036, |
|
"loss": 3.3874, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.0001604494382022472, |
|
"loss": 3.1982, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00015775280898876404, |
|
"loss": 3.323, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0001550561797752809, |
|
"loss": 3.3645, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00015235955056179775, |
|
"loss": 3.3209, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.00014966292134831462, |
|
"loss": 3.28, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00014696629213483146, |
|
"loss": 3.4203, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001442696629213483, |
|
"loss": 3.1531, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00014157303370786517, |
|
"loss": 3.1857, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00013887640449438204, |
|
"loss": 3.2815, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00013617977528089889, |
|
"loss": 3.2879, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00013348314606741575, |
|
"loss": 3.4061, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.0001307865168539326, |
|
"loss": 3.3026, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00012808988764044944, |
|
"loss": 3.3174, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0001253932584269663, |
|
"loss": 3.297, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00012269662921348315, |
|
"loss": 3.2406, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00012, |
|
"loss": 3.1832, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00011730337078651686, |
|
"loss": 3.2514, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0001146067415730337, |
|
"loss": 3.2083, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00011191011235955056, |
|
"loss": 3.1881, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00010921348314606742, |
|
"loss": 3.1987, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00010651685393258428, |
|
"loss": 3.2388, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00010382022471910113, |
|
"loss": 3.3154, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00010112359550561799, |
|
"loss": 3.2307, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 9.842696629213483e-05, |
|
"loss": 3.2064, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 9.573033707865169e-05, |
|
"loss": 3.2081, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 9.303370786516854e-05, |
|
"loss": 3.2904, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 9.03370786516854e-05, |
|
"loss": 3.2104, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 8.764044943820225e-05, |
|
"loss": 3.3033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 8.494382022471911e-05, |
|
"loss": 3.2296, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 8.224719101123596e-05, |
|
"loss": 3.2077, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 7.95505617977528e-05, |
|
"loss": 3.2331, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.685393258426966e-05, |
|
"loss": 3.2844, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 7.415730337078653e-05, |
|
"loss": 3.0974, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 7.146067415730337e-05, |
|
"loss": 3.2892, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 6.876404494382023e-05, |
|
"loss": 3.1417, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 6.606741573033708e-05, |
|
"loss": 3.2408, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 6.337078651685394e-05, |
|
"loss": 3.3139, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 6.067415730337079e-05, |
|
"loss": 3.1222, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 5.7977528089887646e-05, |
|
"loss": 3.2575, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 5.5280898876404495e-05, |
|
"loss": 3.356, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 5.258426966292135e-05, |
|
"loss": 3.129, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 4.9887640449438205e-05, |
|
"loss": 3.1446, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 4.719101123595506e-05, |
|
"loss": 3.1887, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.4494382022471916e-05, |
|
"loss": 3.1785, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 4.1797752808988764e-05, |
|
"loss": 3.278, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 3.910112359550562e-05, |
|
"loss": 3.3521, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 3.6404494382022475e-05, |
|
"loss": 3.1367, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 3.370786516853933e-05, |
|
"loss": 3.1938, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.1011235955056185e-05, |
|
"loss": 3.0515, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 2.8314606741573037e-05, |
|
"loss": 3.187, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 2.5617977528089885e-05, |
|
"loss": 3.2362, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 2.292134831460674e-05, |
|
"loss": 3.1142, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 2.0224719101123596e-05, |
|
"loss": 3.2631, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.752808988764045e-05, |
|
"loss": 3.2107, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.4831460674157305e-05, |
|
"loss": 3.1778, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1.2134831460674158e-05, |
|
"loss": 3.1921, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 9.438202247191012e-06, |
|
"loss": 3.2432, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 6.741573033707865e-06, |
|
"loss": 3.2269, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 4.044943820224719e-06, |
|
"loss": 3.2172, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 1.3483146067415732e-06, |
|
"loss": 3.1409, |
|
"step": 492 |
|
} |
|
], |
|
"max_steps": 495, |
|
"num_train_epochs": 15, |
|
"total_flos": 517312924876800.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|