{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 2250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.0009111111111111111, "loss": 0.2592, "step": 200 }, { "epoch": 0.36, "learning_rate": 0.0008222222222222222, "loss": 0.2127, "step": 400 }, { "epoch": 0.44, "eval_loss": 0.3103792071342468, "eval_macro-f1": 0.3107897037355991, "eval_micro-f1": 0.44178454842219805, "eval_runtime": 16.3051, "eval_samples_per_second": 61.331, "eval_steps_per_second": 7.666, "step": 500 }, { "epoch": 0.53, "learning_rate": 0.0007333333333333333, "loss": 0.201, "step": 600 }, { "epoch": 0.71, "learning_rate": 0.0006444444444444444, "loss": 0.1913, "step": 800 }, { "epoch": 0.89, "learning_rate": 0.0005555555555555556, "loss": 0.1808, "step": 1000 }, { "epoch": 0.89, "eval_loss": 0.2961287200450897, "eval_macro-f1": 0.3452123032410471, "eval_micro-f1": 0.5021520803443329, "eval_runtime": 17.2348, "eval_samples_per_second": 58.022, "eval_steps_per_second": 7.253, "step": 1000 }, { "epoch": 1.07, "learning_rate": 0.00046666666666666666, "loss": 0.1758, "step": 1200 }, { "epoch": 1.24, "learning_rate": 0.00037777777777777777, "loss": 0.1762, "step": 1400 }, { "epoch": 1.33, "eval_loss": 0.2807115316390991, "eval_macro-f1": 0.3945425420350508, "eval_micro-f1": 0.527331189710611, "eval_runtime": 14.5084, "eval_samples_per_second": 68.925, "eval_steps_per_second": 8.616, "step": 1500 }, { "epoch": 1.42, "learning_rate": 0.0002888888888888889, "loss": 0.1698, "step": 1600 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.1622, "step": 1800 }, { "epoch": 1.78, "learning_rate": 0.0001111111111111111, "loss": 0.16, "step": 2000 }, { "epoch": 1.78, "eval_loss": 0.2669467031955719, "eval_macro-f1": 0.4193019540132512, "eval_micro-f1": 0.5572842998585573, "eval_runtime": 14.4979, "eval_samples_per_second": 68.976, "eval_steps_per_second": 8.622, "step": 2000 }, { "epoch": 1.96, "learning_rate": 2.2222222222222223e-05, "loss": 0.1648, "step": 2200 }, { "epoch": 2.0, "step": 2250, "total_flos": 717876559872000.0, "train_loss": 0.18599607192145454, "train_runtime": 356.9797, "train_samples_per_second": 50.423, "train_steps_per_second": 6.303 } ], "max_steps": 2250, "num_train_epochs": 2, "total_flos": 717876559872000.0, "trial_name": null, "trial_params": null }