|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.05318588730911, |
|
"eval_steps": 50, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8554456233978271, |
|
"eval_runtime": 2143.5349, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.8419370651245117, |
|
"eval_runtime": 2143.6574, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.832232117652893, |
|
"eval_runtime": 2143.9117, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.8266302347183228, |
|
"eval_runtime": 2144.0794, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8243311643600464, |
|
"eval_runtime": 2144.1306, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8222966194152832, |
|
"eval_runtime": 2144.2111, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8207086324691772, |
|
"eval_runtime": 2143.8422, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8194689750671387, |
|
"eval_runtime": 2143.9601, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.074, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.818217158317566, |
|
"eval_runtime": 2142.9482, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.0050251256281408e-05, |
|
"loss": 1.8371, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8166958093643188, |
|
"eval_runtime": 2143.0693, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8156176805496216, |
|
"eval_runtime": 2143.2912, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.8143000602722168, |
|
"eval_runtime": 2143.0457, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.813321590423584, |
|
"eval_runtime": 2142.6086, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8123859167099, |
|
"eval_runtime": 2142.7684, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.81136155128479, |
|
"eval_runtime": 2143.3216, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8105697631835938, |
|
"eval_runtime": 2142.989, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8099677562713623, |
|
"eval_runtime": 2142.6436, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.809475302696228, |
|
"eval_runtime": 2142.6825, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.8091450929641724, |
|
"eval_runtime": 2142.5605, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0, |
|
"loss": 1.8001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.809110403060913, |
|
"eval_runtime": 2142.8135, |
|
"eval_samples_per_second": 2.364, |
|
"eval_steps_per_second": 0.074, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"total_flos": 7.15390604279808e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|