|
{ |
|
"best_metric": 0.7850534531405503, |
|
"best_model_checkpoint": "test-klue/ynat/run-3/checkpoint-500", |
|
"epoch": 0.8756567425569177, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.851823303957499e-06, |
|
"loss": 1.934, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_f1": 0.01705140497086806, |
|
"eval_loss": 2.002474546432495, |
|
"eval_runtime": 12.8723, |
|
"eval_samples_per_second": 707.486, |
|
"eval_steps_per_second": 1.398, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.783163352015298e-06, |
|
"loss": 1.9174, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_f1": 0.16773144497786774, |
|
"eval_loss": 1.9946870803833008, |
|
"eval_runtime": 13.4529, |
|
"eval_samples_per_second": 676.953, |
|
"eval_steps_per_second": 1.338, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7145034000730966e-06, |
|
"loss": 1.6984, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_f1": 0.579749057690188, |
|
"eval_loss": 1.6373496055603027, |
|
"eval_runtime": 13.0114, |
|
"eval_samples_per_second": 699.924, |
|
"eval_steps_per_second": 1.383, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.645843448130895e-06, |
|
"loss": 1.2585, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_f1": 0.6734776928120875, |
|
"eval_loss": 1.2733124494552612, |
|
"eval_runtime": 12.6224, |
|
"eval_samples_per_second": 721.493, |
|
"eval_steps_per_second": 1.426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.5771834961886936e-06, |
|
"loss": 0.9487, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_f1": 0.6814953813845488, |
|
"eval_loss": 1.1104496717453003, |
|
"eval_runtime": 12.492, |
|
"eval_samples_per_second": 729.025, |
|
"eval_steps_per_second": 1.441, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.5085235442464923e-06, |
|
"loss": 0.8072, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_f1": 0.6717305786202503, |
|
"eval_loss": 1.0609983205795288, |
|
"eval_runtime": 12.6873, |
|
"eval_samples_per_second": 717.806, |
|
"eval_steps_per_second": 1.419, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.439863592304291e-06, |
|
"loss": 0.7549, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_f1": 0.7228793656517608, |
|
"eval_loss": 0.9494264721870422, |
|
"eval_runtime": 12.8373, |
|
"eval_samples_per_second": 709.416, |
|
"eval_steps_per_second": 1.402, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.3712036403620893e-06, |
|
"loss": 0.6644, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_f1": 0.7719549711033966, |
|
"eval_loss": 0.8139678239822388, |
|
"eval_runtime": 12.9124, |
|
"eval_samples_per_second": 705.29, |
|
"eval_steps_per_second": 1.394, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.302543688419888e-06, |
|
"loss": 0.6306, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_f1": 0.7738311952589042, |
|
"eval_loss": 0.7844408750534058, |
|
"eval_runtime": 12.8236, |
|
"eval_samples_per_second": 710.174, |
|
"eval_steps_per_second": 1.404, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.2338837364776867e-06, |
|
"loss": 0.6093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_f1": 0.7850534531405503, |
|
"eval_loss": 0.7450836300849915, |
|
"eval_runtime": 12.7168, |
|
"eval_samples_per_second": 716.14, |
|
"eval_steps_per_second": 1.415, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2855, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 40829268658800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.851823303957499e-06, |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 8, |
|
"seed": 30 |
|
} |
|
} |
|
|