|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.00663858036512192, |
|
"eval_steps": 13, |
|
"global_step": 26, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00025533001404315077, |
|
"grad_norm": 0.6356120705604553, |
|
"learning_rate": 5e-06, |
|
"loss": 44.3634, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00025533001404315077, |
|
"eval_loss": 11.092207908630371, |
|
"eval_runtime": 4.4694, |
|
"eval_samples_per_second": 368.951, |
|
"eval_steps_per_second": 184.587, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005106600280863015, |
|
"grad_norm": 0.5739763975143433, |
|
"learning_rate": 1e-05, |
|
"loss": 44.3397, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0007659900421294523, |
|
"grad_norm": 0.5774300694465637, |
|
"learning_rate": 1.5e-05, |
|
"loss": 44.3605, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001021320056172603, |
|
"grad_norm": 0.7528806924819946, |
|
"learning_rate": 2e-05, |
|
"loss": 44.3484, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0012766500702157538, |
|
"grad_norm": 0.5565564036369324, |
|
"learning_rate": 2.5e-05, |
|
"loss": 44.3484, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0015319800842589046, |
|
"grad_norm": 0.5334393382072449, |
|
"learning_rate": 3e-05, |
|
"loss": 44.3429, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0017873100983020554, |
|
"grad_norm": 0.5663673281669617, |
|
"learning_rate": 3.5e-05, |
|
"loss": 44.3466, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.002042640112345206, |
|
"grad_norm": 0.5675923824310303, |
|
"learning_rate": 4e-05, |
|
"loss": 44.3318, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.002297970126388357, |
|
"grad_norm": 0.5051159262657166, |
|
"learning_rate": 4.5e-05, |
|
"loss": 44.3295, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0025533001404315077, |
|
"grad_norm": 0.5161178112030029, |
|
"learning_rate": 5e-05, |
|
"loss": 44.3284, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0028086301544746587, |
|
"grad_norm": 0.4618953764438629, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 44.3585, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.003063960168517809, |
|
"grad_norm": 0.6032963991165161, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 44.3768, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00331929018256096, |
|
"grad_norm": 0.5403372645378113, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 44.3579, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00331929018256096, |
|
"eval_loss": 11.091602325439453, |
|
"eval_runtime": 3.5459, |
|
"eval_samples_per_second": 465.044, |
|
"eval_steps_per_second": 232.663, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0035746201966041107, |
|
"grad_norm": 0.593859076499939, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 44.3825, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0038299502106472617, |
|
"grad_norm": 0.6427613496780396, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 44.3755, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004085280224690412, |
|
"grad_norm": 0.6255062222480774, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 44.369, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004340610238733563, |
|
"grad_norm": 0.627750039100647, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 44.3665, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.004595940252776714, |
|
"grad_norm": 0.5742212533950806, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 44.3836, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004851270266819864, |
|
"grad_norm": 0.6519846320152283, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 44.3914, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005106600280863015, |
|
"grad_norm": 0.5559941530227661, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 44.3478, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005361930294906166, |
|
"grad_norm": 0.576760470867157, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 44.3521, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005617260308949317, |
|
"grad_norm": 0.8383557200431824, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 44.3474, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.005872590322992467, |
|
"grad_norm": 0.5783092379570007, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 44.3559, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.006127920337035618, |
|
"grad_norm": 0.5727637410163879, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 44.4125, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006383250351078769, |
|
"grad_norm": 0.610941469669342, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 44.3396, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00663858036512192, |
|
"grad_norm": 0.734337329864502, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 44.3969, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00663858036512192, |
|
"eval_loss": 11.090802192687988, |
|
"eval_runtime": 3.6364, |
|
"eval_samples_per_second": 453.472, |
|
"eval_steps_per_second": 226.873, |
|
"step": 26 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 19424870400.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|