|
{ |
|
"best_metric": 10.823450088500977, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 3.1772151898734178, |
|
"eval_steps": 25, |
|
"global_step": 45, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06751054852320675, |
|
"grad_norm": 10.661303520202637, |
|
"learning_rate": 5e-05, |
|
"loss": 177.6179, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06751054852320675, |
|
"eval_loss": 11.089486122131348, |
|
"eval_runtime": 0.4073, |
|
"eval_samples_per_second": 122.749, |
|
"eval_steps_per_second": 31.915, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1350210970464135, |
|
"grad_norm": 11.559457778930664, |
|
"learning_rate": 0.0001, |
|
"loss": 177.5009, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.20253164556962025, |
|
"grad_norm": 12.108957290649414, |
|
"learning_rate": 9.987995276485983e-05, |
|
"loss": 177.3537, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.270042194092827, |
|
"grad_norm": 11.658157348632812, |
|
"learning_rate": 9.952045156337997e-05, |
|
"loss": 177.1584, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.33755274261603374, |
|
"grad_norm": 11.22645092010498, |
|
"learning_rate": 9.892341449001673e-05, |
|
"loss": 177.0683, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.4050632911392405, |
|
"grad_norm": 12.057780265808105, |
|
"learning_rate": 9.809202699587827e-05, |
|
"loss": 176.7483, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.47257383966244726, |
|
"grad_norm": 13.107226371765137, |
|
"learning_rate": 9.703072489296466e-05, |
|
"loss": 176.3552, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.540084388185654, |
|
"grad_norm": 11.353434562683105, |
|
"learning_rate": 9.574517068719161e-05, |
|
"loss": 176.6248, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.6075949367088608, |
|
"grad_norm": 11.873199462890625, |
|
"learning_rate": 9.424222336647135e-05, |
|
"loss": 176.2631, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.6751054852320675, |
|
"grad_norm": 12.448023796081543, |
|
"learning_rate": 9.252990180504451e-05, |
|
"loss": 175.8736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7426160337552743, |
|
"grad_norm": 11.880512237548828, |
|
"learning_rate": 9.061734197931644e-05, |
|
"loss": 175.8122, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.810126582278481, |
|
"grad_norm": 11.65035343170166, |
|
"learning_rate": 8.851474822347019e-05, |
|
"loss": 175.7791, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.8776371308016878, |
|
"grad_norm": 12.511054992675781, |
|
"learning_rate": 8.623333878492854e-05, |
|
"loss": 175.3126, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.9451476793248945, |
|
"grad_norm": 13.46619987487793, |
|
"learning_rate": 8.37852859701501e-05, |
|
"loss": 174.8253, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.0590717299578059, |
|
"grad_norm": 11.522778511047363, |
|
"learning_rate": 8.118365120010789e-05, |
|
"loss": 175.4029, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.1265822784810127, |
|
"grad_norm": 12.011845588684082, |
|
"learning_rate": 7.844231532195686e-05, |
|
"loss": 174.935, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.1940928270042195, |
|
"grad_norm": 12.92267894744873, |
|
"learning_rate": 7.557590454870874e-05, |
|
"loss": 174.4375, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.261603375527426, |
|
"grad_norm": 12.653983116149902, |
|
"learning_rate": 7.259971242205702e-05, |
|
"loss": 174.3431, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.3291139240506329, |
|
"grad_norm": 12.170445442199707, |
|
"learning_rate": 6.952961821471509e-05, |
|
"loss": 174.4823, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.3966244725738397, |
|
"grad_norm": 12.820944786071777, |
|
"learning_rate": 6.638200220762563e-05, |
|
"loss": 173.9825, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.4641350210970465, |
|
"grad_norm": 13.479021072387695, |
|
"learning_rate": 6.317365829407465e-05, |
|
"loss": 173.4447, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.5316455696202531, |
|
"grad_norm": 11.850541114807129, |
|
"learning_rate": 5.992170437700436e-05, |
|
"loss": 174.0724, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.59915611814346, |
|
"grad_norm": 12.216917037963867, |
|
"learning_rate": 5.6643491037594666e-05, |
|
"loss": 173.7542, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 12.633533477783203, |
|
"learning_rate": 5.335650896240535e-05, |
|
"loss": 173.3273, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.7341772151898733, |
|
"grad_norm": 12.504616737365723, |
|
"learning_rate": 5.0078295622995664e-05, |
|
"loss": 173.2192, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.7341772151898733, |
|
"eval_loss": 10.823450088500977, |
|
"eval_runtime": 0.1051, |
|
"eval_samples_per_second": 475.75, |
|
"eval_steps_per_second": 123.695, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.8016877637130801, |
|
"grad_norm": 12.02143383026123, |
|
"learning_rate": 4.682634170592537e-05, |
|
"loss": 173.4692, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.869198312236287, |
|
"grad_norm": 12.537296295166016, |
|
"learning_rate": 4.3617997792374365e-05, |
|
"loss": 172.9671, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.9367088607594938, |
|
"grad_norm": 12.867912292480469, |
|
"learning_rate": 4.0470381785284936e-05, |
|
"loss": 172.5768, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.050632911392405, |
|
"grad_norm": 11.798124313354492, |
|
"learning_rate": 3.7400287577942993e-05, |
|
"loss": 173.2267, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.1181434599156117, |
|
"grad_norm": 11.980865478515625, |
|
"learning_rate": 3.4424095451291274e-05, |
|
"loss": 172.9524, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.1856540084388185, |
|
"grad_norm": 12.464712142944336, |
|
"learning_rate": 3.155768467804314e-05, |
|
"loss": 172.4342, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.2531645569620253, |
|
"grad_norm": 12.398478507995605, |
|
"learning_rate": 2.8816348799892133e-05, |
|
"loss": 172.3243, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.320675105485232, |
|
"grad_norm": 11.792886734008789, |
|
"learning_rate": 2.621471402984991e-05, |
|
"loss": 172.8272, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.388185654008439, |
|
"grad_norm": 11.965614318847656, |
|
"learning_rate": 2.3766661215071475e-05, |
|
"loss": 172.4491, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.4556962025316453, |
|
"grad_norm": 12.57142162322998, |
|
"learning_rate": 2.148525177652982e-05, |
|
"loss": 171.9388, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.523206751054852, |
|
"grad_norm": 11.402908325195312, |
|
"learning_rate": 1.938265802068357e-05, |
|
"loss": 172.6287, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.590717299578059, |
|
"grad_norm": 11.887864112854004, |
|
"learning_rate": 1.74700981949555e-05, |
|
"loss": 172.3424, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.6582278481012658, |
|
"grad_norm": 12.034871101379395, |
|
"learning_rate": 1.5757776633528655e-05, |
|
"loss": 172.0158, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.7257383966244726, |
|
"grad_norm": 12.191680908203125, |
|
"learning_rate": 1.4254829312808404e-05, |
|
"loss": 171.8472, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.7932489451476794, |
|
"grad_norm": 11.363740921020508, |
|
"learning_rate": 1.2969275107035345e-05, |
|
"loss": 172.5804, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.8607594936708862, |
|
"grad_norm": 11.75307559967041, |
|
"learning_rate": 1.1907973004121738e-05, |
|
"loss": 172.0639, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.928270042194093, |
|
"grad_norm": 12.174725532531738, |
|
"learning_rate": 1.1076585509983283e-05, |
|
"loss": 171.687, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 3.042194092827004, |
|
"grad_norm": 11.464780807495117, |
|
"learning_rate": 1.0479548436620041e-05, |
|
"loss": 172.3345, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.109704641350211, |
|
"grad_norm": 11.445940017700195, |
|
"learning_rate": 1.0120047235140178e-05, |
|
"loss": 172.2803, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.1772151898734178, |
|
"grad_norm": 11.784713745117188, |
|
"learning_rate": 1e-05, |
|
"loss": 171.8565, |
|
"step": 45 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 45, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 34300011479040.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|