|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 3000, |
|
"global_step": 556, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019640287769784174, |
|
"loss": 2.1468, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019280575539568347, |
|
"loss": 1.9709, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018920863309352518, |
|
"loss": 1.8468, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001856115107913669, |
|
"loss": 1.6942, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018201438848920864, |
|
"loss": 1.4989, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017841726618705037, |
|
"loss": 1.3514, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001748201438848921, |
|
"loss": 1.3165, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017122302158273383, |
|
"loss": 1.2268, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00016762589928057554, |
|
"loss": 1.3164, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016402877697841727, |
|
"loss": 1.3179, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000160431654676259, |
|
"loss": 1.2648, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015683453237410073, |
|
"loss": 1.178, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015323741007194246, |
|
"loss": 1.1558, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001496402877697842, |
|
"loss": 1.0114, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001460431654676259, |
|
"loss": 0.8844, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014244604316546763, |
|
"loss": 0.9118, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013884892086330936, |
|
"loss": 1.0269, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001352517985611511, |
|
"loss": 0.9542, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00013165467625899283, |
|
"loss": 0.8281, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012805755395683453, |
|
"loss": 0.8024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012446043165467626, |
|
"loss": 0.8185, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012086330935251799, |
|
"loss": 1.099, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011726618705035972, |
|
"loss": 0.8726, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011366906474820144, |
|
"loss": 0.7907, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011007194244604317, |
|
"loss": 0.9099, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001064748201438849, |
|
"loss": 0.7301, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010287769784172662, |
|
"loss": 0.772, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.928057553956835e-05, |
|
"loss": 0.7936, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.568345323741009e-05, |
|
"loss": 0.6623, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.20863309352518e-05, |
|
"loss": 0.5091, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.848920863309353e-05, |
|
"loss": 0.4996, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.489208633093527e-05, |
|
"loss": 0.5529, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.129496402877698e-05, |
|
"loss": 0.6094, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.769784172661872e-05, |
|
"loss": 0.4422, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.410071942446043e-05, |
|
"loss": 0.4468, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.050359712230215e-05, |
|
"loss": 0.4673, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.690647482014388e-05, |
|
"loss": 0.4746, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.366906474820145e-05, |
|
"loss": 0.4601, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.007194244604317e-05, |
|
"loss": 0.4793, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.64748201438849e-05, |
|
"loss": 0.5919, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.287769784172663e-05, |
|
"loss": 0.4313, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.9280575539568345e-05, |
|
"loss": 0.4715, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.5683453237410076e-05, |
|
"loss": 0.3263, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.20863309352518e-05, |
|
"loss": 0.4526, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.8489208633093525e-05, |
|
"loss": 0.4379, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.489208633093525e-05, |
|
"loss": 0.5083, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.129496402877698e-05, |
|
"loss": 0.5373, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7697841726618706e-05, |
|
"loss": 0.3303, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4100719424460434e-05, |
|
"loss": 0.3214, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.050359712230216e-05, |
|
"loss": 0.4343, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6906474820143887e-05, |
|
"loss": 0.314, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3309352517985613e-05, |
|
"loss": 0.3727, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.71223021582734e-06, |
|
"loss": 0.2798, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.115107913669065e-06, |
|
"loss": 0.4031, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.5179856115107916e-06, |
|
"loss": 0.3542, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 556, |
|
"total_flos": 6.887981879958897e+17, |
|
"train_loss": 0.8073481788738168, |
|
"train_runtime": 211.1785, |
|
"train_samples_per_second": 42.088, |
|
"train_steps_per_second": 2.633 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 556, |
|
"num_train_epochs": 2, |
|
"save_steps": 3000, |
|
"total_flos": 6.887981879958897e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|