{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7971753420380452, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9957482993197282e-05, "loss": 31.2602, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.991496598639456e-05, "loss": 18.3425, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.987244897959184e-05, "loss": 11.5784, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.982993197278912e-05, "loss": 7.843, "step": 800 }, { "epoch": 0.05, "learning_rate": 1.9787414965986397e-05, "loss": 4.9763, "step": 1000 }, { "epoch": 0.06, "learning_rate": 1.9744897959183677e-05, "loss": 3.365, "step": 1200 }, { "epoch": 0.07, "learning_rate": 1.9702380952380954e-05, "loss": 2.5671, "step": 1400 }, { "epoch": 0.09, "learning_rate": 1.965986394557823e-05, "loss": 1.9747, "step": 1600 }, { "epoch": 0.1, "learning_rate": 1.961734693877551e-05, "loss": 1.528, "step": 1800 }, { "epoch": 0.11, "learning_rate": 1.9574829931972788e-05, "loss": 1.1564, "step": 2000 }, { "epoch": 0.12, "learning_rate": 1.953231292517007e-05, "loss": 0.8765, "step": 2200 }, { "epoch": 0.13, "learning_rate": 1.948979591836735e-05, "loss": 0.6853, "step": 2400 }, { "epoch": 0.14, "learning_rate": 1.9447278911564626e-05, "loss": 0.5731, "step": 2600 }, { "epoch": 0.15, "learning_rate": 1.9404761904761906e-05, "loss": 0.4927, "step": 2800 }, { "epoch": 0.16, "learning_rate": 1.9362244897959186e-05, "loss": 0.4354, "step": 3000 }, { "epoch": 0.17, "learning_rate": 1.9319727891156463e-05, "loss": 0.3948, "step": 3200 }, { "epoch": 0.18, "learning_rate": 1.9277210884353744e-05, "loss": 0.3723, "step": 3400 }, { "epoch": 0.19, "learning_rate": 1.9234693877551024e-05, "loss": 0.346, "step": 3600 }, { "epoch": 0.2, "learning_rate": 1.91921768707483e-05, "loss": 0.3295, "step": 3800 }, { "epoch": 0.21, "learning_rate": 1.914965986394558e-05, "loss": 0.3101, "step": 4000 }, { "epoch": 0.21, "eval_loss": 0.16350167989730835, "eval_runtime": 1650.684, "eval_samples_per_second": 81.06, "eval_steps_per_second": 20.266, "step": 4000 }, { "epoch": 0.22, "learning_rate": 1.910714285714286e-05, "loss": 0.2984, "step": 4200 }, { "epoch": 0.23, "learning_rate": 1.906462585034014e-05, "loss": 0.2811, "step": 4400 }, { "epoch": 0.24, "learning_rate": 1.9022108843537416e-05, "loss": 0.2704, "step": 4600 }, { "epoch": 0.26, "learning_rate": 1.8979591836734696e-05, "loss": 0.2612, "step": 4800 }, { "epoch": 0.27, "learning_rate": 1.8937074829931973e-05, "loss": 0.2441, "step": 5000 }, { "epoch": 0.28, "learning_rate": 1.8894557823129253e-05, "loss": 0.2384, "step": 5200 }, { "epoch": 0.29, "learning_rate": 1.885204081632653e-05, "loss": 0.2265, "step": 5400 }, { "epoch": 0.3, "learning_rate": 1.880952380952381e-05, "loss": 0.2226, "step": 5600 }, { "epoch": 0.31, "learning_rate": 1.876700680272109e-05, "loss": 0.2197, "step": 5800 }, { "epoch": 0.32, "learning_rate": 1.8724489795918368e-05, "loss": 0.2079, "step": 6000 }, { "epoch": 0.33, "learning_rate": 1.8681972789115648e-05, "loss": 0.204, "step": 6200 }, { "epoch": 0.34, "learning_rate": 1.863945578231293e-05, "loss": 0.1986, "step": 6400 }, { "epoch": 0.35, "learning_rate": 1.8596938775510206e-05, "loss": 0.1965, "step": 6600 }, { "epoch": 0.36, "learning_rate": 1.8554421768707486e-05, "loss": 0.1874, "step": 6800 }, { "epoch": 0.37, "learning_rate": 1.8511904761904763e-05, "loss": 0.1889, "step": 7000 }, { "epoch": 0.38, "learning_rate": 1.8469387755102043e-05, "loss": 0.1841, "step": 7200 }, { "epoch": 0.39, "learning_rate": 1.842687074829932e-05, "loss": 0.1808, "step": 7400 }, { "epoch": 0.4, "learning_rate": 1.83843537414966e-05, "loss": 0.1778, "step": 7600 }, { "epoch": 0.41, "learning_rate": 1.8341836734693877e-05, "loss": 0.1715, "step": 7800 }, { "epoch": 0.43, "learning_rate": 1.8299319727891158e-05, "loss": 0.1717, "step": 8000 }, { "epoch": 0.43, "eval_loss": 0.1131439059972763, "eval_runtime": 1642.8688, "eval_samples_per_second": 81.446, "eval_steps_per_second": 20.362, "step": 8000 }, { "epoch": 0.44, "learning_rate": 1.8256802721088435e-05, "loss": 0.1673, "step": 8200 }, { "epoch": 0.45, "learning_rate": 1.8214285714285715e-05, "loss": 0.1633, "step": 8400 }, { "epoch": 0.46, "learning_rate": 1.8171768707482995e-05, "loss": 0.1608, "step": 8600 }, { "epoch": 0.47, "learning_rate": 1.8129251700680272e-05, "loss": 0.1623, "step": 8800 }, { "epoch": 0.48, "learning_rate": 1.8086734693877553e-05, "loss": 0.1583, "step": 9000 }, { "epoch": 0.49, "learning_rate": 1.8044217687074833e-05, "loss": 0.1548, "step": 9200 }, { "epoch": 0.5, "learning_rate": 1.800170068027211e-05, "loss": 0.1553, "step": 9400 }, { "epoch": 0.51, "learning_rate": 1.795918367346939e-05, "loss": 0.1491, "step": 9600 }, { "epoch": 0.52, "learning_rate": 1.7916666666666667e-05, "loss": 0.1529, "step": 9800 }, { "epoch": 0.53, "learning_rate": 1.7874149659863948e-05, "loss": 0.1489, "step": 10000 }, { "epoch": 0.54, "learning_rate": 1.7831632653061225e-05, "loss": 0.1454, "step": 10200 }, { "epoch": 0.55, "learning_rate": 1.7789115646258505e-05, "loss": 0.1458, "step": 10400 }, { "epoch": 0.56, "learning_rate": 1.7746598639455782e-05, "loss": 0.1401, "step": 10600 }, { "epoch": 0.57, "learning_rate": 1.7704081632653062e-05, "loss": 0.1407, "step": 10800 }, { "epoch": 0.58, "learning_rate": 1.7661564625850343e-05, "loss": 0.1377, "step": 11000 }, { "epoch": 0.6, "learning_rate": 1.761904761904762e-05, "loss": 0.1378, "step": 11200 }, { "epoch": 0.61, "learning_rate": 1.75765306122449e-05, "loss": 0.1344, "step": 11400 }, { "epoch": 0.62, "learning_rate": 1.7534013605442177e-05, "loss": 0.1331, "step": 11600 }, { "epoch": 0.63, "learning_rate": 1.7491496598639457e-05, "loss": 0.1333, "step": 11800 }, { "epoch": 0.64, "learning_rate": 1.7448979591836738e-05, "loss": 0.1317, "step": 12000 }, { "epoch": 0.64, "eval_loss": 0.09435752034187317, "eval_runtime": 1637.6898, "eval_samples_per_second": 81.704, "eval_steps_per_second": 20.426, "step": 12000 }, { "epoch": 0.65, "learning_rate": 1.7406462585034015e-05, "loss": 0.1309, "step": 12200 }, { "epoch": 0.66, "learning_rate": 1.7363945578231295e-05, "loss": 0.1308, "step": 12400 }, { "epoch": 0.67, "learning_rate": 1.7321428571428572e-05, "loss": 0.1303, "step": 12600 }, { "epoch": 0.68, "learning_rate": 1.7278911564625852e-05, "loss": 0.1258, "step": 12800 }, { "epoch": 0.69, "learning_rate": 1.723639455782313e-05, "loss": 0.1261, "step": 13000 }, { "epoch": 0.7, "learning_rate": 1.719387755102041e-05, "loss": 0.1261, "step": 13200 }, { "epoch": 0.71, "learning_rate": 1.7151360544217686e-05, "loss": 0.1236, "step": 13400 }, { "epoch": 0.72, "learning_rate": 1.7108843537414967e-05, "loss": 0.1219, "step": 13600 }, { "epoch": 0.73, "learning_rate": 1.7066326530612247e-05, "loss": 0.1223, "step": 13800 }, { "epoch": 0.74, "learning_rate": 1.7023809523809524e-05, "loss": 0.1219, "step": 14000 }, { "epoch": 0.75, "learning_rate": 1.6981292517006804e-05, "loss": 0.1188, "step": 14200 }, { "epoch": 0.77, "learning_rate": 1.6938775510204085e-05, "loss": 0.119, "step": 14400 }, { "epoch": 0.78, "learning_rate": 1.6896258503401362e-05, "loss": 0.118, "step": 14600 }, { "epoch": 0.79, "learning_rate": 1.6853741496598642e-05, "loss": 0.118, "step": 14800 }, { "epoch": 0.8, "learning_rate": 1.681122448979592e-05, "loss": 0.1156, "step": 15000 } ], "max_steps": 94080, "num_train_epochs": 5, "total_flos": 5.075997032448e+17, "trial_name": null, "trial_params": null }