{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.973293768545994, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 8e-08, "loss": 0.6853, "step": 10 }, { "epoch": 0.12, "learning_rate": 1.6e-07, "loss": 0.6856, "step": 20 }, { "epoch": 0.18, "learning_rate": 2.4e-07, "loss": 0.6901, "step": 30 }, { "epoch": 0.24, "learning_rate": 3.2e-07, "loss": 0.6933, "step": 40 }, { "epoch": 0.3, "learning_rate": 4e-07, "loss": 0.6836, "step": 50 }, { "epoch": 0.36, "learning_rate": 4.8e-07, "loss": 0.6823, "step": 60 }, { "epoch": 0.42, "learning_rate": 5.6e-07, "loss": 0.6894, "step": 70 }, { "epoch": 0.47, "learning_rate": 6.4e-07, "loss": 0.6857, "step": 80 }, { "epoch": 0.53, "learning_rate": 7.2e-07, "loss": 0.6775, "step": 90 }, { "epoch": 0.59, "learning_rate": 8e-07, "loss": 0.6846, "step": 100 }, { "epoch": 0.65, "learning_rate": 8.799999999999999e-07, "loss": 0.68, "step": 110 }, { "epoch": 0.71, "learning_rate": 9.6e-07, "loss": 0.6751, "step": 120 }, { "epoch": 0.77, "learning_rate": 1.04e-06, "loss": 0.6803, "step": 130 }, { "epoch": 0.83, "learning_rate": 1.12e-06, "loss": 0.6812, "step": 140 }, { "epoch": 0.89, "learning_rate": 1.2e-06, "loss": 0.6866, "step": 150 }, { "epoch": 0.95, "learning_rate": 1.28e-06, "loss": 0.667, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.5742942050520059, "eval_f1": 0.364794714487966, "eval_loss": 0.6554083824157715, "eval_precision": 0.28714710252600295, "eval_recall": 0.5, "eval_runtime": 10.606, "eval_samples_per_second": 253.819, "step": 168 }, { "epoch": 1.01, "learning_rate": 1.3600000000000001e-06, "loss": 0.6922, "step": 170 }, { "epoch": 1.07, "learning_rate": 1.44e-06, "loss": 0.64, "step": 180 }, { "epoch": 1.13, "learning_rate": 1.5199999999999998e-06, "loss": 0.5834, "step": 190 }, { "epoch": 1.19, "learning_rate": 1.6e-06, "loss": 0.4636, "step": 200 }, { "epoch": 1.25, "learning_rate": 1.6799999999999998e-06, "loss": 0.3717, "step": 210 }, { "epoch": 1.31, "learning_rate": 1.7599999999999999e-06, "loss": 0.3508, "step": 220 }, { "epoch": 1.37, "learning_rate": 1.84e-06, "loss": 0.2926, "step": 230 }, { "epoch": 1.43, "learning_rate": 1.92e-06, "loss": 0.2967, "step": 240 }, { "epoch": 1.49, "learning_rate": 2e-06, "loss": 0.2973, "step": 250 }, { "epoch": 1.55, "learning_rate": 2.08e-06, "loss": 0.2747, "step": 260 }, { "epoch": 1.61, "learning_rate": 2.16e-06, "loss": 0.3057, "step": 270 }, { "epoch": 1.66, "learning_rate": 2.24e-06, "loss": 0.284, "step": 280 }, { "epoch": 1.72, "learning_rate": 2.32e-06, "loss": 0.3063, "step": 290 }, { "epoch": 1.78, "learning_rate": 2.4e-06, "loss": 0.2705, "step": 300 }, { "epoch": 1.84, "learning_rate": 2.48e-06, "loss": 0.2598, "step": 310 }, { "epoch": 1.9, "learning_rate": 2.56e-06, "loss": 0.246, "step": 320 }, { "epoch": 1.96, "learning_rate": 2.64e-06, "loss": 0.2556, "step": 330 }, { "epoch": 2.0, "eval_accuracy": 0.8904160475482912, "eval_f1": 0.8873658539006881, "eval_loss": 0.27534565329551697, "eval_precision": 0.889704946803928, "eval_recall": 0.885514947090843, "eval_runtime": 10.6052, "eval_samples_per_second": 253.837, "step": 336 }, { "epoch": 2.02, "learning_rate": 2.7200000000000002e-06, "loss": 0.2643, "step": 340 }, { "epoch": 2.08, "learning_rate": 2.8e-06, "loss": 0.2734, "step": 350 }, { "epoch": 2.14, "learning_rate": 2.88e-06, "loss": 0.2294, "step": 360 }, { "epoch": 2.2, "learning_rate": 2.96e-06, "loss": 0.2289, "step": 370 }, { "epoch": 2.26, "learning_rate": 3.0399999999999997e-06, "loss": 0.2494, "step": 380 }, { "epoch": 2.32, "learning_rate": 3.1199999999999998e-06, "loss": 0.222, "step": 390 }, { "epoch": 2.38, "learning_rate": 3.2e-06, "loss": 0.2603, "step": 400 }, { "epoch": 2.44, "learning_rate": 3.2799999999999995e-06, "loss": 0.265, "step": 410 }, { "epoch": 2.5, "learning_rate": 3.3599999999999996e-06, "loss": 0.2482, "step": 420 }, { "epoch": 2.56, "learning_rate": 3.4399999999999997e-06, "loss": 0.2623, "step": 430 }, { "epoch": 2.62, "learning_rate": 3.5199999999999998e-06, "loss": 0.2411, "step": 440 }, { "epoch": 2.68, "learning_rate": 3.6e-06, "loss": 0.2331, "step": 450 }, { "epoch": 2.74, "learning_rate": 3.68e-06, "loss": 0.2301, "step": 460 }, { "epoch": 2.8, "learning_rate": 3.7599999999999996e-06, "loss": 0.2105, "step": 470 }, { "epoch": 2.85, "learning_rate": 3.84e-06, "loss": 0.2282, "step": 480 }, { "epoch": 2.91, "learning_rate": 3.92e-06, "loss": 0.2357, "step": 490 }, { "epoch": 2.97, "learning_rate": 4e-06, "loss": 0.2191, "step": 500 } ], "max_steps": 840, "num_train_epochs": 5, "total_flos": 7162006012003440, "trial_name": null, "trial_params": null }