|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 3180, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9968553459119497, |
|
"grad_norm": 0.6745762825012207, |
|
"learning_rate": 2.7360787331269834e-05, |
|
"loss": 0.357, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7412903225806452, |
|
"eval_loss": 0.14044521749019623, |
|
"eval_runtime": 5.4763, |
|
"eval_samples_per_second": 566.072, |
|
"eval_steps_per_second": 11.869, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9937106918238994, |
|
"grad_norm": 0.4204491376876831, |
|
"learning_rate": 2.433131838819874e-05, |
|
"loss": 0.1207, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8770967741935484, |
|
"eval_loss": 0.06287968158721924, |
|
"eval_runtime": 5.5283, |
|
"eval_samples_per_second": 560.754, |
|
"eval_steps_per_second": 11.758, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.990566037735849, |
|
"grad_norm": 0.4077504873275757, |
|
"learning_rate": 2.130184944512765e-05, |
|
"loss": 0.0718, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9116129032258065, |
|
"eval_loss": 0.04054585471749306, |
|
"eval_runtime": 5.4053, |
|
"eval_samples_per_second": 573.506, |
|
"eval_steps_per_second": 12.025, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.9874213836477987, |
|
"grad_norm": 0.2433169037103653, |
|
"learning_rate": 1.8272380502056557e-05, |
|
"loss": 0.0534, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9274193548387096, |
|
"eval_loss": 0.03159501776099205, |
|
"eval_runtime": 5.4668, |
|
"eval_samples_per_second": 567.056, |
|
"eval_steps_per_second": 11.89, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.984276729559748, |
|
"grad_norm": 0.21281147003173828, |
|
"learning_rate": 1.5242911558985466e-05, |
|
"loss": 0.0442, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9309677419354838, |
|
"eval_loss": 0.02665688283741474, |
|
"eval_runtime": 5.5204, |
|
"eval_samples_per_second": 561.552, |
|
"eval_steps_per_second": 11.774, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.981132075471698, |
|
"grad_norm": 0.23512092232704163, |
|
"learning_rate": 1.2213442615914374e-05, |
|
"loss": 0.0389, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9329032258064516, |
|
"eval_loss": 0.024243181571364403, |
|
"eval_runtime": 5.4419, |
|
"eval_samples_per_second": 569.649, |
|
"eval_steps_per_second": 11.944, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 6.977987421383648, |
|
"grad_norm": 0.2756204605102539, |
|
"learning_rate": 9.18397367284328e-06, |
|
"loss": 0.0356, |
|
"step": 2219 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9361290322580645, |
|
"eval_loss": 0.02270686812698841, |
|
"eval_runtime": 5.4205, |
|
"eval_samples_per_second": 571.9, |
|
"eval_steps_per_second": 11.991, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 7.9748427672955975, |
|
"grad_norm": 0.1520700752735138, |
|
"learning_rate": 6.154504729772188e-06, |
|
"loss": 0.0336, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9354838709677419, |
|
"eval_loss": 0.021294621750712395, |
|
"eval_runtime": 5.4685, |
|
"eval_samples_per_second": 566.883, |
|
"eval_steps_per_second": 11.886, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 8.971698113207546, |
|
"grad_norm": 0.2087375968694687, |
|
"learning_rate": 3.1250357867010953e-06, |
|
"loss": 0.0322, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9354838709677419, |
|
"eval_loss": 0.020687058568000793, |
|
"eval_runtime": 5.4663, |
|
"eval_samples_per_second": 567.112, |
|
"eval_steps_per_second": 11.891, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 9.968553459119496, |
|
"grad_norm": 0.14469225704669952, |
|
"learning_rate": 9.556684363000292e-08, |
|
"loss": 0.0314, |
|
"step": 3170 |
|
} |
|
], |
|
"logging_steps": 317, |
|
"max_steps": 3180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000000000.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 825254092458012.0, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.0478722237588074, |
|
"learning_rate": 3.0390256274340926e-05, |
|
"lr_scheduler_type": "linear", |
|
"num_train_epochs": 10, |
|
"temperature": 7.533742821161418, |
|
"weight_decay": 0.25236012891933407 |
|
} |
|
} |
|
|