|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2507601166034542, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018807008745259066, |
|
"grad_norm": 0.7771628499031067, |
|
"learning_rate": 1.9877931524601816e-05, |
|
"loss": 10.1462, |
|
"num_input_tokens_seen": 19660800, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03761401749051813, |
|
"grad_norm": 0.841651201248169, |
|
"learning_rate": 1.97525187074119e-05, |
|
"loss": 9.8974, |
|
"num_input_tokens_seen": 39321600, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0564210262357772, |
|
"grad_norm": 0.7740743160247803, |
|
"learning_rate": 1.9627105890221982e-05, |
|
"loss": 9.6653, |
|
"num_input_tokens_seen": 58982400, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07522803498103627, |
|
"grad_norm": 0.8001583218574524, |
|
"learning_rate": 1.9501693073032066e-05, |
|
"loss": 9.4506, |
|
"num_input_tokens_seen": 78643200, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09403504372629533, |
|
"grad_norm": 0.8240243792533875, |
|
"learning_rate": 1.937628025584215e-05, |
|
"loss": 9.2539, |
|
"num_input_tokens_seen": 98304000, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1128420524715544, |
|
"grad_norm": 0.7667157053947449, |
|
"learning_rate": 1.9250867438652233e-05, |
|
"loss": 9.0693, |
|
"num_input_tokens_seen": 117964800, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13164906121681347, |
|
"grad_norm": 0.8228150010108948, |
|
"learning_rate": 1.9125454621462316e-05, |
|
"loss": 8.8967, |
|
"num_input_tokens_seen": 137625600, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.15045606996207253, |
|
"grad_norm": 0.8424794673919678, |
|
"learning_rate": 1.9000041804272396e-05, |
|
"loss": 8.7364, |
|
"num_input_tokens_seen": 157286400, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1692630787073316, |
|
"grad_norm": 0.7859320640563965, |
|
"learning_rate": 1.8874628987082483e-05, |
|
"loss": 8.5903, |
|
"num_input_tokens_seen": 176947200, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.18807008745259066, |
|
"grad_norm": 0.7881995439529419, |
|
"learning_rate": 1.8749216169892567e-05, |
|
"loss": 8.4497, |
|
"num_input_tokens_seen": 196608000, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.20687709619784972, |
|
"grad_norm": 0.8333655595779419, |
|
"learning_rate": 1.8623803352702647e-05, |
|
"loss": 8.3184, |
|
"num_input_tokens_seen": 216268800, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2256841049431088, |
|
"grad_norm": 0.7956089973449707, |
|
"learning_rate": 1.849839053551273e-05, |
|
"loss": 8.1998, |
|
"num_input_tokens_seen": 235929600, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.24449111368836787, |
|
"grad_norm": 0.8068310022354126, |
|
"learning_rate": 1.8372977718322814e-05, |
|
"loss": 8.0871, |
|
"num_input_tokens_seen": 255590400, |
|
"step": 1950 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 23925, |
|
"num_input_tokens_seen": 262144000, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4047671132160000.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|