|
{ |
|
"best_metric": 0.27896466851234436, |
|
"best_model_checkpoint": "out/checkpoint-900", |
|
"epoch": 0.8419083255378859, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.3386548161506653, |
|
"eval_runtime": 3.4462, |
|
"eval_samples_per_second": 152.924, |
|
"eval_steps_per_second": 19.152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.32248201966285706, |
|
"eval_runtime": 3.2636, |
|
"eval_samples_per_second": 161.477, |
|
"eval_steps_per_second": 20.223, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.3344081938266754, |
|
"eval_runtime": 3.3846, |
|
"eval_samples_per_second": 155.706, |
|
"eval_steps_per_second": 19.5, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.37741541862487793, |
|
"eval_runtime": 3.4216, |
|
"eval_samples_per_second": 154.02, |
|
"eval_steps_per_second": 19.289, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.9799095266062985e-06, |
|
"loss": 0.3934, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.41931670904159546, |
|
"eval_runtime": 3.2614, |
|
"eval_samples_per_second": 161.586, |
|
"eval_steps_per_second": 20.237, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.3693557381629944, |
|
"eval_runtime": 3.2679, |
|
"eval_samples_per_second": 161.266, |
|
"eval_steps_per_second": 20.196, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.34475621581077576, |
|
"eval_runtime": 3.2639, |
|
"eval_samples_per_second": 161.465, |
|
"eval_steps_per_second": 20.221, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.34064173698425293, |
|
"eval_runtime": 3.2698, |
|
"eval_samples_per_second": 161.174, |
|
"eval_steps_per_second": 20.185, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.27896466851234436, |
|
"eval_runtime": 3.2598, |
|
"eval_samples_per_second": 161.668, |
|
"eval_steps_per_second": 20.247, |
|
"step": 900 |
|
} |
|
], |
|
"max_steps": 2138, |
|
"num_train_epochs": 2, |
|
"total_flos": 148483879153632.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|