|
{ |
|
"best_metric": 0.2845354974269867, |
|
"best_model_checkpoint": "outputs/checkpoint-101", |
|
"epoch": 7.724137931034483, |
|
"eval_steps": 500, |
|
"global_step": 112, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00018219165847995, |
|
"loss": 1.693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.8468072414398193, |
|
"eval_runtime": 47.2393, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.042, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001643297311779941, |
|
"loss": 0.8001, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5586440563201904, |
|
"eval_runtime": 46.728, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.043, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00014646780387603822, |
|
"loss": 0.6269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00012860587657408234, |
|
"loss": 0.3671, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 0.33814382553100586, |
|
"eval_runtime": 48.0352, |
|
"eval_samples_per_second": 0.312, |
|
"eval_steps_per_second": 0.042, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00011074394927212647, |
|
"loss": 0.2759, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.31169143319129944, |
|
"eval_runtime": 47.7275, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.042, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 9.288202197017058e-05, |
|
"loss": 0.2149, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 7.50200946682147e-05, |
|
"loss": 0.164, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_loss": 0.29888349771499634, |
|
"eval_runtime": 47.9342, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.042, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 5.7158167366258816e-05, |
|
"loss": 0.1221, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.2934713065624237, |
|
"eval_runtime": 48.2143, |
|
"eval_samples_per_second": 0.311, |
|
"eval_steps_per_second": 0.041, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 3.929624006430294e-05, |
|
"loss": 0.1183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 2.143431276234706e-05, |
|
"loss": 0.0866, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 0.2845354974269867, |
|
"eval_runtime": 46.7151, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.043, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 3.572385460391176e-06, |
|
"loss": 0.0837, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"eval_loss": 0.2850027084350586, |
|
"eval_runtime": 47.2985, |
|
"eval_samples_per_second": 0.317, |
|
"eval_steps_per_second": 0.042, |
|
"step": 112 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 2.546768317169664e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|