|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9910464083111202, |
|
"global_step": 14500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.977217323946871e-05, |
|
"loss": 3.4122, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9544346478937417e-05, |
|
"loss": 3.3784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9316519718406125e-05, |
|
"loss": 3.3481, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9088692957874832e-05, |
|
"loss": 3.3364, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.886086619734354e-05, |
|
"loss": 3.3282, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.863303943681225e-05, |
|
"loss": 3.3188, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.840521267628096e-05, |
|
"loss": 3.3164, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8177385915749666e-05, |
|
"loss": 3.3237, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7949559155218374e-05, |
|
"loss": 3.3041, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.772173239468708e-05, |
|
"loss": 3.2969, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.749390563415579e-05, |
|
"loss": 3.3079, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7266078873624497e-05, |
|
"loss": 3.2821, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7038252113093204e-05, |
|
"loss": 3.2939, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6810425352561915e-05, |
|
"loss": 3.2861, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.658259859203062e-05, |
|
"loss": 3.2938, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.635477183149933e-05, |
|
"loss": 3.2831, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6126945070968035e-05, |
|
"loss": 3.2811, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5899118310436746e-05, |
|
"loss": 3.2798, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5671291549905454e-05, |
|
"loss": 3.2675, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.544346478937416e-05, |
|
"loss": 3.2738, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5215638028842869e-05, |
|
"loss": 3.2673, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4987811268311577e-05, |
|
"loss": 3.2651, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4759984507780286e-05, |
|
"loss": 3.2829, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4532157747248993e-05, |
|
"loss": 3.2623, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4304330986717701e-05, |
|
"loss": 3.2607, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4076504226186409e-05, |
|
"loss": 3.2648, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3848677465655116e-05, |
|
"loss": 3.2718, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3620850705123824e-05, |
|
"loss": 3.2582, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3393023944592533e-05, |
|
"loss": 3.2499, |
|
"step": 14500 |
|
} |
|
], |
|
"max_steps": 43893, |
|
"num_train_epochs": 3, |
|
"total_flos": 7297427570688000, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|