|
{ |
|
"best_metric": 0.2633977234363556, |
|
"best_model_checkpoint": "saved_model/checkpoint-14890", |
|
"epoch": 0.9999664215439374, |
|
"eval_steps": 500, |
|
"global_step": 14890, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.986568166554735e-05, |
|
"loss": 0.3718, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9731363331094694e-05, |
|
"loss": 0.3007, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.959704499664204e-05, |
|
"loss": 0.2808, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.946272666218939e-05, |
|
"loss": 0.2593, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9328408327736737e-05, |
|
"loss": 0.2444, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9194089993284085e-05, |
|
"loss": 0.2381, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9059771658831433e-05, |
|
"loss": 0.2374, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.892545332437878e-05, |
|
"loss": 0.2278, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8791134989926128e-05, |
|
"loss": 0.2193, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8656816655473473e-05, |
|
"loss": 0.219, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.852249832102082e-05, |
|
"loss": 0.2241, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8388179986568168e-05, |
|
"loss": 0.2268, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8253861652115516e-05, |
|
"loss": 0.2135, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8119543317662864e-05, |
|
"loss": 0.2164, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.798522498321021e-05, |
|
"loss": 0.214, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.785090664875756e-05, |
|
"loss": 0.2042, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7716588314304904e-05, |
|
"loss": 0.2031, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.758226997985225e-05, |
|
"loss": 0.2045, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.74479516453996e-05, |
|
"loss": 0.1993, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7313633310946947e-05, |
|
"loss": 0.2012, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.717931497649429e-05, |
|
"loss": 0.1971, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.704499664204164e-05, |
|
"loss": 0.1918, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6910678307588987e-05, |
|
"loss": 0.1972, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.6776359973136335e-05, |
|
"loss": 0.1855, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6642041638683682e-05, |
|
"loss": 0.1873, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.650772330423103e-05, |
|
"loss": 0.1807, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6373404969778378e-05, |
|
"loss": 0.1842, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6239086635325722e-05, |
|
"loss": 0.1968, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.610476830087307e-05, |
|
"loss": 0.1786, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9212894560107455, |
|
"eval_loss": 0.2633977234363556, |
|
"eval_macro_f1": 0.9043313763923932, |
|
"eval_runtime": 742.0543, |
|
"eval_samples_per_second": 80.264, |
|
"eval_steps_per_second": 20.066, |
|
"step": 14890 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 74450, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6.268568271740928e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|