|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.927739863508634, |
|
"eval_steps": 1866, |
|
"global_step": 18660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.992773986350863, |
|
"grad_norm": 7.660736560821533, |
|
"learning_rate": 9.001071811361202e-06, |
|
"loss": 2.077, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 5.992773986350863, |
|
"eval_accuracy": 0.07665166078598369, |
|
"eval_loss": 3.0592968463897705, |
|
"eval_runtime": 32.9334, |
|
"eval_samples_per_second": 249.564, |
|
"eval_steps_per_second": 12.48, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 11.985547972701726, |
|
"grad_norm": 8.02175521850586, |
|
"learning_rate": 8.0021436227224e-06, |
|
"loss": 1.8747, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 11.985547972701726, |
|
"eval_accuracy": 0.07878087358559435, |
|
"eval_loss": 3.1968600749969482, |
|
"eval_runtime": 33.6195, |
|
"eval_samples_per_second": 244.471, |
|
"eval_steps_per_second": 12.225, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 17.97832195905259, |
|
"grad_norm": 7.991235733032227, |
|
"learning_rate": 7.002143622722402e-06, |
|
"loss": 1.7613, |
|
"step": 5598 |
|
}, |
|
{ |
|
"epoch": 17.97832195905259, |
|
"eval_accuracy": 0.07819280528855903, |
|
"eval_loss": 3.2275424003601074, |
|
"eval_runtime": 33.0021, |
|
"eval_samples_per_second": 249.045, |
|
"eval_steps_per_second": 12.454, |
|
"step": 5598 |
|
}, |
|
{ |
|
"epoch": 23.97109594540345, |
|
"grad_norm": 4.696638584136963, |
|
"learning_rate": 6.002679528403002e-06, |
|
"loss": 1.703, |
|
"step": 7464 |
|
}, |
|
{ |
|
"epoch": 23.97109594540345, |
|
"eval_accuracy": 0.07881129091130308, |
|
"eval_loss": 3.367746114730835, |
|
"eval_runtime": 33.0083, |
|
"eval_samples_per_second": 248.998, |
|
"eval_steps_per_second": 12.451, |
|
"step": 7464 |
|
}, |
|
{ |
|
"epoch": 29.963869931754317, |
|
"grad_norm": 2.74943208694458, |
|
"learning_rate": 5.0037513397642025e-06, |
|
"loss": 1.676, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 29.963869931754317, |
|
"eval_accuracy": 0.07835503102567222, |
|
"eval_loss": 3.4368343353271484, |
|
"eval_runtime": 32.8486, |
|
"eval_samples_per_second": 250.208, |
|
"eval_steps_per_second": 12.512, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 35.95664391810518, |
|
"grad_norm": 4.24375057220459, |
|
"learning_rate": 4.003751339764202e-06, |
|
"loss": 1.6495, |
|
"step": 11196 |
|
}, |
|
{ |
|
"epoch": 35.95664391810518, |
|
"eval_accuracy": 0.07829419637425478, |
|
"eval_loss": 3.5519556999206543, |
|
"eval_runtime": 32.9918, |
|
"eval_samples_per_second": 249.123, |
|
"eval_steps_per_second": 12.458, |
|
"step": 11196 |
|
}, |
|
{ |
|
"epoch": 41.94941790445604, |
|
"grad_norm": 3.4726574420928955, |
|
"learning_rate": 3.004287245444802e-06, |
|
"loss": 1.6449, |
|
"step": 13062 |
|
}, |
|
{ |
|
"epoch": 41.94941790445604, |
|
"eval_accuracy": 0.07814645507795526, |
|
"eval_loss": 3.556215763092041, |
|
"eval_runtime": 32.6442, |
|
"eval_samples_per_second": 251.776, |
|
"eval_steps_per_second": 12.59, |
|
"step": 13062 |
|
}, |
|
{ |
|
"epoch": 47.9421918908069, |
|
"grad_norm": 3.2187790870666504, |
|
"learning_rate": 2.004287245444802e-06, |
|
"loss": 1.6293, |
|
"step": 14928 |
|
}, |
|
{ |
|
"epoch": 47.9421918908069, |
|
"eval_accuracy": 0.07750334590582796, |
|
"eval_loss": 3.621753215789795, |
|
"eval_runtime": 32.8748, |
|
"eval_samples_per_second": 250.009, |
|
"eval_steps_per_second": 12.502, |
|
"step": 14928 |
|
}, |
|
{ |
|
"epoch": 53.934965877157765, |
|
"grad_norm": 1.5753388404846191, |
|
"learning_rate": 1.0048231511254019e-06, |
|
"loss": 1.6301, |
|
"step": 16794 |
|
}, |
|
{ |
|
"epoch": 53.934965877157765, |
|
"eval_accuracy": 0.07704370631734057, |
|
"eval_loss": 3.7435097694396973, |
|
"eval_runtime": 32.6254, |
|
"eval_samples_per_second": 251.92, |
|
"eval_steps_per_second": 12.598, |
|
"step": 16794 |
|
}, |
|
{ |
|
"epoch": 59.927739863508634, |
|
"grad_norm": 1.8606178760528564, |
|
"learning_rate": 5.359056806002144e-09, |
|
"loss": 1.6232, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 59.927739863508634, |
|
"eval_accuracy": 0.07645698990144786, |
|
"eval_loss": 3.775934934616089, |
|
"eval_runtime": 32.9764, |
|
"eval_samples_per_second": 249.239, |
|
"eval_steps_per_second": 12.463, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 59.927739863508634, |
|
"step": 18660, |
|
"total_flos": 1.4754079994623488e+18, |
|
"train_loss": 1.726910066757938, |
|
"train_runtime": 33547.8653, |
|
"train_samples_per_second": 111.342, |
|
"train_steps_per_second": 0.556 |
|
} |
|
], |
|
"logging_steps": 1866, |
|
"max_steps": 18660, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4754079994623488e+18, |
|
"train_batch_size": 25, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|