{ "best_metric": 0.675777792930603, "best_model_checkpoint": "trainer/checkpoint-372428", "epoch": 50.0, "eval_steps": 500, "global_step": 396200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 2e-05, "loss": 2.2513, "step": 7924 }, { "epoch": 1.0, "eval_loss": 1.2939835786819458, "eval_runtime": 37.6044, "eval_samples_per_second": 749.168, "eval_steps_per_second": 23.428, "step": 7924 }, { "epoch": 2.0, "learning_rate": 4e-05, "loss": 1.3752, "step": 15848 }, { "epoch": 2.0, "eval_loss": 1.1416432857513428, "eval_runtime": 37.8169, "eval_samples_per_second": 744.958, "eval_steps_per_second": 23.296, "step": 15848 }, { "epoch": 3.0, "learning_rate": 4.9473684210526315e-05, "loss": 1.2436, "step": 23772 }, { "epoch": 3.0, "eval_loss": 1.0676215887069702, "eval_runtime": 37.7184, "eval_samples_per_second": 746.904, "eval_steps_per_second": 23.357, "step": 23772 }, { "epoch": 4.0, "learning_rate": 4.842105263157895e-05, "loss": 1.1458, "step": 31696 }, { "epoch": 4.0, "eval_loss": 0.9978280067443848, "eval_runtime": 36.3705, "eval_samples_per_second": 774.584, "eval_steps_per_second": 24.223, "step": 31696 }, { "epoch": 5.0, "learning_rate": 4.736842105263158e-05, "loss": 1.0841, "step": 39620 }, { "epoch": 5.0, "eval_loss": 0.9508912563323975, "eval_runtime": 36.5102, "eval_samples_per_second": 771.62, "eval_steps_per_second": 24.13, "step": 39620 }, { "epoch": 6.0, "learning_rate": 4.6315789473684214e-05, "loss": 1.0386, "step": 47544 }, { "epoch": 6.0, "eval_loss": 0.9356514811515808, "eval_runtime": 36.323, "eval_samples_per_second": 775.598, "eval_steps_per_second": 24.255, "step": 47544 }, { "epoch": 7.0, "learning_rate": 4.5263157894736846e-05, "loss": 0.9989, "step": 55468 }, { "epoch": 7.0, "eval_loss": 0.9180737137794495, "eval_runtime": 36.4427, "eval_samples_per_second": 773.05, "eval_steps_per_second": 24.175, "step": 55468 }, { "epoch": 8.0, "learning_rate": 4.421052631578947e-05, "loss": 0.9686, "step": 63392 }, { "epoch": 8.0, "eval_loss": 0.8952454328536987, "eval_runtime": 36.1822, "eval_samples_per_second": 778.615, "eval_steps_per_second": 24.349, "step": 63392 }, { "epoch": 9.0, "learning_rate": 4.3157894736842105e-05, "loss": 0.9426, "step": 71316 }, { "epoch": 9.0, "eval_loss": 0.8876378536224365, "eval_runtime": 36.5182, "eval_samples_per_second": 771.451, "eval_steps_per_second": 24.125, "step": 71316 }, { "epoch": 10.0, "learning_rate": 4.210526315789474e-05, "loss": 0.9198, "step": 79240 }, { "epoch": 10.0, "eval_loss": 0.8818822503089905, "eval_runtime": 36.3065, "eval_samples_per_second": 775.949, "eval_steps_per_second": 24.266, "step": 79240 }, { "epoch": 11.0, "learning_rate": 4.105263157894737e-05, "loss": 0.9053, "step": 87164 }, { "epoch": 11.0, "eval_loss": 0.8349147439002991, "eval_runtime": 36.3771, "eval_samples_per_second": 774.444, "eval_steps_per_second": 24.219, "step": 87164 }, { "epoch": 12.0, "learning_rate": 4e-05, "loss": 0.8881, "step": 95088 }, { "epoch": 12.0, "eval_loss": 0.8407663702964783, "eval_runtime": 36.4047, "eval_samples_per_second": 773.857, "eval_steps_per_second": 24.2, "step": 95088 }, { "epoch": 13.0, "learning_rate": 3.894736842105263e-05, "loss": 0.8704, "step": 103012 }, { "epoch": 13.0, "eval_loss": 0.8339666128158569, "eval_runtime": 36.314, "eval_samples_per_second": 775.79, "eval_steps_per_second": 24.261, "step": 103012 }, { "epoch": 14.0, "learning_rate": 3.789473684210527e-05, "loss": 0.8533, "step": 110936 }, { "epoch": 14.0, "eval_loss": 0.8264057636260986, "eval_runtime": 36.2648, "eval_samples_per_second": 776.841, "eval_steps_per_second": 24.294, "step": 110936 }, { "epoch": 15.0, "learning_rate": 3.6842105263157895e-05, "loss": 0.8418, "step": 118860 }, { "epoch": 15.0, "eval_loss": 0.8100990653038025, "eval_runtime": 36.5073, "eval_samples_per_second": 771.682, "eval_steps_per_second": 24.132, "step": 118860 }, { "epoch": 16.0, "learning_rate": 3.578947368421053e-05, "loss": 0.8307, "step": 126784 }, { "epoch": 16.0, "eval_loss": 0.8106787800788879, "eval_runtime": 36.2767, "eval_samples_per_second": 776.586, "eval_steps_per_second": 24.286, "step": 126784 }, { "epoch": 17.0, "learning_rate": 3.473684210526316e-05, "loss": 0.815, "step": 134708 }, { "epoch": 17.0, "eval_loss": 0.7991083264350891, "eval_runtime": 36.2711, "eval_samples_per_second": 776.707, "eval_steps_per_second": 24.289, "step": 134708 }, { "epoch": 18.0, "learning_rate": 3.368421052631579e-05, "loss": 0.8015, "step": 142632 }, { "epoch": 18.0, "eval_loss": 0.7952774167060852, "eval_runtime": 37.5556, "eval_samples_per_second": 750.142, "eval_steps_per_second": 23.459, "step": 142632 }, { "epoch": 19.0, "learning_rate": 3.2631578947368426e-05, "loss": 0.7894, "step": 150556 }, { "epoch": 19.0, "eval_loss": 0.7720882296562195, "eval_runtime": 36.9729, "eval_samples_per_second": 761.964, "eval_steps_per_second": 23.828, "step": 150556 }, { "epoch": 20.0, "learning_rate": 3.157894736842105e-05, "loss": 0.7789, "step": 158480 }, { "epoch": 20.0, "eval_loss": 0.7802249789237976, "eval_runtime": 37.1298, "eval_samples_per_second": 758.744, "eval_steps_per_second": 23.728, "step": 158480 }, { "epoch": 21.0, "learning_rate": 3.0526315789473684e-05, "loss": 0.7678, "step": 166404 }, { "epoch": 21.0, "eval_loss": 0.7610885500907898, "eval_runtime": 36.2573, "eval_samples_per_second": 777.002, "eval_steps_per_second": 24.299, "step": 166404 }, { "epoch": 22.0, "learning_rate": 2.9473684210526314e-05, "loss": 0.7534, "step": 174328 }, { "epoch": 22.0, "eval_loss": 0.782088577747345, "eval_runtime": 37.4893, "eval_samples_per_second": 751.467, "eval_steps_per_second": 23.5, "step": 174328 }, { "epoch": 23.0, "learning_rate": 2.842105263157895e-05, "loss": 0.7502, "step": 182252 }, { "epoch": 23.0, "eval_loss": 0.7673630714416504, "eval_runtime": 36.6312, "eval_samples_per_second": 769.07, "eval_steps_per_second": 24.051, "step": 182252 }, { "epoch": 24.0, "learning_rate": 2.7368421052631583e-05, "loss": 0.7345, "step": 190176 }, { "epoch": 24.0, "eval_loss": 0.7627705335617065, "eval_runtime": 37.0149, "eval_samples_per_second": 761.099, "eval_steps_per_second": 23.801, "step": 190176 }, { "epoch": 25.0, "learning_rate": 2.6315789473684212e-05, "loss": 0.7264, "step": 198100 }, { "epoch": 25.0, "eval_loss": 0.7561437487602234, "eval_runtime": 37.2677, "eval_samples_per_second": 755.937, "eval_steps_per_second": 23.64, "step": 198100 }, { "epoch": 26.0, "learning_rate": 2.5263157894736845e-05, "loss": 0.7142, "step": 206024 }, { "epoch": 26.0, "eval_loss": 0.7509896159172058, "eval_runtime": 37.3161, "eval_samples_per_second": 754.956, "eval_steps_per_second": 23.609, "step": 206024 }, { "epoch": 27.0, "learning_rate": 2.4210526315789474e-05, "loss": 0.7018, "step": 213948 }, { "epoch": 27.0, "eval_loss": 0.7464780807495117, "eval_runtime": 36.7538, "eval_samples_per_second": 766.505, "eval_steps_per_second": 23.97, "step": 213948 }, { "epoch": 28.0, "learning_rate": 2.3157894736842107e-05, "loss": 0.6897, "step": 221872 }, { "epoch": 28.0, "eval_loss": 0.7344001531600952, "eval_runtime": 37.0652, "eval_samples_per_second": 760.066, "eval_steps_per_second": 23.769, "step": 221872 }, { "epoch": 29.0, "learning_rate": 2.2105263157894736e-05, "loss": 0.682, "step": 229796 }, { "epoch": 29.0, "eval_loss": 0.7429642081260681, "eval_runtime": 37.2732, "eval_samples_per_second": 755.824, "eval_steps_per_second": 23.636, "step": 229796 }, { "epoch": 30.0, "learning_rate": 2.105263157894737e-05, "loss": 0.6754, "step": 237720 }, { "epoch": 30.0, "eval_loss": 0.7481978535652161, "eval_runtime": 37.0222, "eval_samples_per_second": 760.949, "eval_steps_per_second": 23.797, "step": 237720 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 0.6679, "step": 245644 }, { "epoch": 31.0, "eval_loss": 0.7224923968315125, "eval_runtime": 37.2439, "eval_samples_per_second": 756.419, "eval_steps_per_second": 23.655, "step": 245644 }, { "epoch": 32.0, "learning_rate": 1.8947368421052634e-05, "loss": 0.6566, "step": 253568 }, { "epoch": 32.0, "eval_loss": 0.715844452381134, "eval_runtime": 36.5457, "eval_samples_per_second": 770.871, "eval_steps_per_second": 24.107, "step": 253568 }, { "epoch": 33.0, "learning_rate": 1.7894736842105264e-05, "loss": 0.6492, "step": 261492 }, { "epoch": 33.0, "eval_loss": 0.7234057188034058, "eval_runtime": 36.3822, "eval_samples_per_second": 774.335, "eval_steps_per_second": 24.215, "step": 261492 }, { "epoch": 34.0, "learning_rate": 1.6842105263157896e-05, "loss": 0.642, "step": 269416 }, { "epoch": 34.0, "eval_loss": 0.7132413983345032, "eval_runtime": 36.3793, "eval_samples_per_second": 774.396, "eval_steps_per_second": 24.217, "step": 269416 }, { "epoch": 35.0, "learning_rate": 1.5789473684210526e-05, "loss": 0.6342, "step": 277340 }, { "epoch": 35.0, "eval_loss": 0.7007443904876709, "eval_runtime": 36.3032, "eval_samples_per_second": 776.02, "eval_steps_per_second": 24.268, "step": 277340 }, { "epoch": 36.0, "learning_rate": 1.4736842105263157e-05, "loss": 0.6236, "step": 285264 }, { "epoch": 36.0, "eval_loss": 0.69706791639328, "eval_runtime": 37.0755, "eval_samples_per_second": 759.854, "eval_steps_per_second": 23.762, "step": 285264 }, { "epoch": 37.0, "learning_rate": 1.3684210526315791e-05, "loss": 0.6146, "step": 293188 }, { "epoch": 37.0, "eval_loss": 0.6900755167007446, "eval_runtime": 36.5007, "eval_samples_per_second": 771.822, "eval_steps_per_second": 24.137, "step": 293188 }, { "epoch": 38.0, "learning_rate": 1.2631578947368422e-05, "loss": 0.6087, "step": 301112 }, { "epoch": 38.0, "eval_loss": 0.6962341666221619, "eval_runtime": 36.6631, "eval_samples_per_second": 768.402, "eval_steps_per_second": 24.03, "step": 301112 }, { "epoch": 39.0, "learning_rate": 1.1578947368421053e-05, "loss": 0.5989, "step": 309036 }, { "epoch": 39.0, "eval_loss": 0.7045713067054749, "eval_runtime": 36.6758, "eval_samples_per_second": 768.136, "eval_steps_per_second": 24.021, "step": 309036 }, { "epoch": 40.0, "learning_rate": 1.0526315789473684e-05, "loss": 0.5924, "step": 316960 }, { "epoch": 40.0, "eval_loss": 0.6984645128250122, "eval_runtime": 36.7394, "eval_samples_per_second": 766.807, "eval_steps_per_second": 23.98, "step": 316960 }, { "epoch": 41.0, "learning_rate": 9.473684210526317e-06, "loss": 0.5827, "step": 324884 }, { "epoch": 41.0, "eval_loss": 0.6994604468345642, "eval_runtime": 36.8305, "eval_samples_per_second": 764.91, "eval_steps_per_second": 23.92, "step": 324884 }, { "epoch": 42.0, "learning_rate": 8.421052631578948e-06, "loss": 0.5731, "step": 332808 }, { "epoch": 42.0, "eval_loss": 0.6827645301818848, "eval_runtime": 36.7809, "eval_samples_per_second": 765.941, "eval_steps_per_second": 23.953, "step": 332808 }, { "epoch": 43.0, "learning_rate": 7.3684210526315784e-06, "loss": 0.5718, "step": 340732 }, { "epoch": 43.0, "eval_loss": 0.7020975947380066, "eval_runtime": 36.6627, "eval_samples_per_second": 768.411, "eval_steps_per_second": 24.03, "step": 340732 }, { "epoch": 44.0, "learning_rate": 6.315789473684211e-06, "loss": 0.5663, "step": 348656 }, { "epoch": 44.0, "eval_loss": 0.6774910092353821, "eval_runtime": 36.8941, "eval_samples_per_second": 763.59, "eval_steps_per_second": 23.879, "step": 348656 }, { "epoch": 45.0, "learning_rate": 5.263157894736842e-06, "loss": 0.5575, "step": 356580 }, { "epoch": 45.0, "eval_loss": 0.683965265750885, "eval_runtime": 36.8405, "eval_samples_per_second": 764.703, "eval_steps_per_second": 23.914, "step": 356580 }, { "epoch": 46.0, "learning_rate": 4.210526315789474e-06, "loss": 0.5524, "step": 364504 }, { "epoch": 46.0, "eval_loss": 0.6812577247619629, "eval_runtime": 36.3196, "eval_samples_per_second": 775.669, "eval_steps_per_second": 24.257, "step": 364504 }, { "epoch": 47.0, "learning_rate": 3.1578947368421056e-06, "loss": 0.5499, "step": 372428 }, { "epoch": 47.0, "eval_loss": 0.675777792930603, "eval_runtime": 36.282, "eval_samples_per_second": 776.473, "eval_steps_per_second": 24.282, "step": 372428 }, { "epoch": 48.0, "learning_rate": 2.105263157894737e-06, "loss": 0.541, "step": 380352 }, { "epoch": 48.0, "eval_loss": 0.678913950920105, "eval_runtime": 36.4409, "eval_samples_per_second": 773.087, "eval_steps_per_second": 24.176, "step": 380352 }, { "epoch": 49.0, "learning_rate": 1.0526315789473685e-06, "loss": 0.5372, "step": 388276 }, { "epoch": 49.0, "eval_loss": 0.6796761155128479, "eval_runtime": 36.5663, "eval_samples_per_second": 770.435, "eval_steps_per_second": 24.093, "step": 388276 }, { "epoch": 50.0, "learning_rate": 0.0, "loss": 0.535, "step": 396200 }, { "epoch": 50.0, "eval_loss": 0.6791965365409851, "eval_runtime": 36.555, "eval_samples_per_second": 770.674, "eval_steps_per_second": 24.101, "step": 396200 } ], "logging_steps": 500, "max_steps": 396200, "num_train_epochs": 50, "save_steps": 500, "total_flos": 7.079826371258392e+17, "trial_name": null, "trial_params": null }