|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.477832512315271, |
|
"eval_steps": 100, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.3109243697478994e-05, |
|
"loss": 1.0832, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.100840336134454e-05, |
|
"loss": 0.4234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.36679086089134216, |
|
"eval_runtime": 111.544, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8907563025210083e-05, |
|
"loss": 0.3757, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6806722689075634e-05, |
|
"loss": 0.3644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 0.32610517740249634, |
|
"eval_runtime": 111.5555, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.3444, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2605042016806723e-05, |
|
"loss": 0.3289, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.3098466098308563, |
|
"eval_runtime": 111.5402, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.050420168067227e-05, |
|
"loss": 0.3027, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.403361344537817e-06, |
|
"loss": 0.2973, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.2937043309211731, |
|
"eval_runtime": 111.5631, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.3025210084033615e-06, |
|
"loss": 0.3113, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.2016806722689085e-06, |
|
"loss": 0.2787, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.28651753067970276, |
|
"eval_runtime": 111.552, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.1008403361344543e-06, |
|
"loss": 0.2982, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0, |
|
"loss": 0.3138, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.28348788619041443, |
|
"eval_runtime": 111.5723, |
|
"eval_samples_per_second": 1.219, |
|
"eval_steps_per_second": 0.152, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 6.9835844772864e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|