|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.98463901689708, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.923195084485407e-05, |
|
"loss": 2.0051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8463901689708145e-05, |
|
"loss": 1.6247, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.3888643980026245, |
|
"eval_runtime": 7.9545, |
|
"eval_samples_per_second": 653.846, |
|
"eval_steps_per_second": 81.841, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.7695852534562215e-05, |
|
"loss": 1.4928, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.6927803379416285e-05, |
|
"loss": 1.3947, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.6159754224270355e-05, |
|
"loss": 1.3445, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.201552152633667, |
|
"eval_runtime": 8.1327, |
|
"eval_samples_per_second": 639.513, |
|
"eval_steps_per_second": 80.047, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.5391705069124425e-05, |
|
"loss": 1.2667, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.4623655913978497e-05, |
|
"loss": 1.2231, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.0806881189346313, |
|
"eval_runtime": 7.9883, |
|
"eval_samples_per_second": 651.08, |
|
"eval_steps_per_second": 81.495, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.3855606758832567e-05, |
|
"loss": 1.2226, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.3087557603686638e-05, |
|
"loss": 1.1666, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.2319508448540707e-05, |
|
"loss": 1.1263, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.024440050125122, |
|
"eval_runtime": 7.9462, |
|
"eval_samples_per_second": 654.524, |
|
"eval_steps_per_second": 81.926, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.1551459293394778e-05, |
|
"loss": 1.1107, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.0783410138248848e-05, |
|
"loss": 1.0675, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.001536098310292e-05, |
|
"loss": 1.062, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.9445285797119141, |
|
"eval_runtime": 8.0272, |
|
"eval_samples_per_second": 647.921, |
|
"eval_steps_per_second": 81.099, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 9.24731182795699e-06, |
|
"loss": 1.03, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 8.47926267281106e-06, |
|
"loss": 1.0055, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.9380741715431213, |
|
"eval_runtime": 8.0529, |
|
"eval_samples_per_second": 645.851, |
|
"eval_steps_per_second": 80.84, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 7.711213517665132e-06, |
|
"loss": 0.9869, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 6.9431643625192015e-06, |
|
"loss": 1.0125, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.175115207373272e-06, |
|
"loss": 1.004, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.902702271938324, |
|
"eval_runtime": 8.0927, |
|
"eval_samples_per_second": 642.681, |
|
"eval_steps_per_second": 80.443, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 5.407066052227343e-06, |
|
"loss": 0.9893, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 4.639016897081414e-06, |
|
"loss": 0.9525, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.8645159006118774, |
|
"eval_runtime": 8.0556, |
|
"eval_samples_per_second": 645.641, |
|
"eval_steps_per_second": 80.814, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.870967741935484e-06, |
|
"loss": 0.9761, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.1029185867895553e-06, |
|
"loss": 0.9469, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 2.3348694316436257e-06, |
|
"loss": 0.9166, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.8442177176475525, |
|
"eval_runtime": 8.0728, |
|
"eval_samples_per_second": 644.264, |
|
"eval_steps_per_second": 80.641, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 1.5668202764976959e-06, |
|
"loss": 0.9201, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 7.987711213517666e-07, |
|
"loss": 0.9143, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 3.0721966205837177e-08, |
|
"loss": 0.9314, |
|
"step": 13000 |
|
} |
|
], |
|
"max_steps": 13020, |
|
"num_train_epochs": 10, |
|
"total_flos": 673236987330600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|