{ "best_metric": 0.2633977234363556, "best_model_checkpoint": "saved_model/checkpoint-14890", "epoch": 0.9999664215439374, "eval_steps": 500, "global_step": 14890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.986568166554735e-05, "loss": 0.3718, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.9731363331094694e-05, "loss": 0.3007, "step": 1000 }, { "epoch": 0.1, "learning_rate": 1.959704499664204e-05, "loss": 0.2808, "step": 1500 }, { "epoch": 0.13, "learning_rate": 1.946272666218939e-05, "loss": 0.2593, "step": 2000 }, { "epoch": 0.17, "learning_rate": 1.9328408327736737e-05, "loss": 0.2444, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.9194089993284085e-05, "loss": 0.2381, "step": 3000 }, { "epoch": 0.24, "learning_rate": 1.9059771658831433e-05, "loss": 0.2374, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.892545332437878e-05, "loss": 0.2278, "step": 4000 }, { "epoch": 0.3, "learning_rate": 1.8791134989926128e-05, "loss": 0.2193, "step": 4500 }, { "epoch": 0.34, "learning_rate": 1.8656816655473473e-05, "loss": 0.219, "step": 5000 }, { "epoch": 0.37, "learning_rate": 1.852249832102082e-05, "loss": 0.2241, "step": 5500 }, { "epoch": 0.4, "learning_rate": 1.8388179986568168e-05, "loss": 0.2268, "step": 6000 }, { "epoch": 0.44, "learning_rate": 1.8253861652115516e-05, "loss": 0.2135, "step": 6500 }, { "epoch": 0.47, "learning_rate": 1.8119543317662864e-05, "loss": 0.2164, "step": 7000 }, { "epoch": 0.5, "learning_rate": 1.798522498321021e-05, "loss": 0.214, "step": 7500 }, { "epoch": 0.54, "learning_rate": 1.785090664875756e-05, "loss": 0.2042, "step": 8000 }, { "epoch": 0.57, "learning_rate": 1.7716588314304904e-05, "loss": 0.2031, "step": 8500 }, { "epoch": 0.6, "learning_rate": 1.758226997985225e-05, "loss": 0.2045, "step": 9000 }, { "epoch": 0.64, "learning_rate": 1.74479516453996e-05, "loss": 0.1993, "step": 9500 }, { "epoch": 0.67, "learning_rate": 1.7313633310946947e-05, "loss": 0.2012, "step": 10000 }, { "epoch": 0.71, "learning_rate": 1.717931497649429e-05, "loss": 0.1971, "step": 10500 }, { "epoch": 0.74, "learning_rate": 1.704499664204164e-05, "loss": 0.1918, "step": 11000 }, { "epoch": 0.77, "learning_rate": 1.6910678307588987e-05, "loss": 0.1972, "step": 11500 }, { "epoch": 0.81, "learning_rate": 1.6776359973136335e-05, "loss": 0.1855, "step": 12000 }, { "epoch": 0.84, "learning_rate": 1.6642041638683682e-05, "loss": 0.1873, "step": 12500 }, { "epoch": 0.87, "learning_rate": 1.650772330423103e-05, "loss": 0.1807, "step": 13000 }, { "epoch": 0.91, "learning_rate": 1.6373404969778378e-05, "loss": 0.1842, "step": 13500 }, { "epoch": 0.94, "learning_rate": 1.6239086635325722e-05, "loss": 0.1968, "step": 14000 }, { "epoch": 0.97, "learning_rate": 1.610476830087307e-05, "loss": 0.1786, "step": 14500 }, { "epoch": 1.0, "eval_accuracy": 0.9212894560107455, "eval_loss": 0.2633977234363556, "eval_macro_f1": 0.9043313763923932, "eval_runtime": 742.0543, "eval_samples_per_second": 80.264, "eval_steps_per_second": 20.066, "step": 14890 } ], "logging_steps": 500, "max_steps": 74450, "num_train_epochs": 5, "save_steps": 500, "total_flos": 6.268568271740928e+16, "trial_name": null, "trial_params": null }