{ "best_metric": 0.7850534531405503, "best_model_checkpoint": "test-klue/ynat/run-3/checkpoint-500", "epoch": 0.8756567425569177, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 3.851823303957499e-06, "loss": 1.934, "step": 50 }, { "epoch": 0.09, "eval_f1": 0.01705140497086806, "eval_loss": 2.002474546432495, "eval_runtime": 12.8723, "eval_samples_per_second": 707.486, "eval_steps_per_second": 1.398, "step": 50 }, { "epoch": 0.18, "learning_rate": 3.783163352015298e-06, "loss": 1.9174, "step": 100 }, { "epoch": 0.18, "eval_f1": 0.16773144497786774, "eval_loss": 1.9946870803833008, "eval_runtime": 13.4529, "eval_samples_per_second": 676.953, "eval_steps_per_second": 1.338, "step": 100 }, { "epoch": 0.26, "learning_rate": 3.7145034000730966e-06, "loss": 1.6984, "step": 150 }, { "epoch": 0.26, "eval_f1": 0.579749057690188, "eval_loss": 1.6373496055603027, "eval_runtime": 13.0114, "eval_samples_per_second": 699.924, "eval_steps_per_second": 1.383, "step": 150 }, { "epoch": 0.35, "learning_rate": 3.645843448130895e-06, "loss": 1.2585, "step": 200 }, { "epoch": 0.35, "eval_f1": 0.6734776928120875, "eval_loss": 1.2733124494552612, "eval_runtime": 12.6224, "eval_samples_per_second": 721.493, "eval_steps_per_second": 1.426, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.5771834961886936e-06, "loss": 0.9487, "step": 250 }, { "epoch": 0.44, "eval_f1": 0.6814953813845488, "eval_loss": 1.1104496717453003, "eval_runtime": 12.492, "eval_samples_per_second": 729.025, "eval_steps_per_second": 1.441, "step": 250 }, { "epoch": 0.53, "learning_rate": 3.5085235442464923e-06, "loss": 0.8072, "step": 300 }, { "epoch": 0.53, "eval_f1": 0.6717305786202503, "eval_loss": 1.0609983205795288, "eval_runtime": 12.6873, "eval_samples_per_second": 717.806, "eval_steps_per_second": 1.419, "step": 300 }, { "epoch": 0.61, "learning_rate": 3.439863592304291e-06, "loss": 0.7549, "step": 350 }, { "epoch": 0.61, "eval_f1": 0.7228793656517608, "eval_loss": 0.9494264721870422, "eval_runtime": 12.8373, "eval_samples_per_second": 709.416, "eval_steps_per_second": 1.402, "step": 350 }, { "epoch": 0.7, "learning_rate": 3.3712036403620893e-06, "loss": 0.6644, "step": 400 }, { "epoch": 0.7, "eval_f1": 0.7719549711033966, "eval_loss": 0.8139678239822388, "eval_runtime": 12.9124, "eval_samples_per_second": 705.29, "eval_steps_per_second": 1.394, "step": 400 }, { "epoch": 0.79, "learning_rate": 3.302543688419888e-06, "loss": 0.6306, "step": 450 }, { "epoch": 0.79, "eval_f1": 0.7738311952589042, "eval_loss": 0.7844408750534058, "eval_runtime": 12.8236, "eval_samples_per_second": 710.174, "eval_steps_per_second": 1.404, "step": 450 }, { "epoch": 0.88, "learning_rate": 3.2338837364776867e-06, "loss": 0.6093, "step": 500 }, { "epoch": 0.88, "eval_f1": 0.7850534531405503, "eval_loss": 0.7450836300849915, "eval_runtime": 12.7168, "eval_samples_per_second": 716.14, "eval_steps_per_second": 1.415, "step": 500 } ], "logging_steps": 50, "max_steps": 2855, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 40829268658800.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 3.851823303957499e-06, "num_train_epochs": 5, "per_device_train_batch_size": 8, "seed": 30 } }