{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 78750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.968253968253969e-05, "loss": 0.8671, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.936507936507937e-05, "loss": 0.2133, "step": 1000 }, { "epoch": 0.06, "learning_rate": 4.904761904761905e-05, "loss": 0.1414, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.873015873015873e-05, "loss": 0.1091, "step": 2000 }, { "epoch": 0.1, "learning_rate": 4.841269841269841e-05, "loss": 0.0924, "step": 2500 }, { "epoch": 0.11, "learning_rate": 4.80952380952381e-05, "loss": 0.0813, "step": 3000 }, { "epoch": 0.13, "learning_rate": 4.7777777777777784e-05, "loss": 0.0782, "step": 3500 }, { "epoch": 0.15, "learning_rate": 4.746031746031746e-05, "loss": 0.0707, "step": 4000 }, { "epoch": 0.17, "learning_rate": 4.714285714285714e-05, "loss": 0.0672, "step": 4500 }, { "epoch": 0.19, "learning_rate": 4.682539682539683e-05, "loss": 0.0624, "step": 5000 }, { "epoch": 0.21, "learning_rate": 4.6507936507936515e-05, "loss": 0.0595, "step": 5500 }, { "epoch": 0.23, "learning_rate": 4.6190476190476194e-05, "loss": 0.0584, "step": 6000 }, { "epoch": 0.25, "learning_rate": 4.587301587301587e-05, "loss": 0.0518, "step": 6500 }, { "epoch": 0.27, "learning_rate": 4.555555555555556e-05, "loss": 0.05, "step": 7000 }, { "epoch": 0.29, "learning_rate": 4.523809523809524e-05, "loss": 0.0498, "step": 7500 }, { "epoch": 0.3, "learning_rate": 4.4920634920634924e-05, "loss": 0.0451, "step": 8000 }, { "epoch": 0.32, "learning_rate": 4.460317460317461e-05, "loss": 0.0513, "step": 8500 }, { "epoch": 0.34, "learning_rate": 4.428571428571428e-05, "loss": 0.0516, "step": 9000 }, { "epoch": 0.36, "learning_rate": 4.396825396825397e-05, "loss": 0.0464, "step": 9500 }, { "epoch": 0.38, "learning_rate": 4.3650793650793655e-05, "loss": 0.0464, "step": 10000 }, { "epoch": 0.4, "learning_rate": 4.3333333333333334e-05, "loss": 0.0424, "step": 10500 }, { "epoch": 0.42, "learning_rate": 4.301587301587302e-05, "loss": 0.044, "step": 11000 }, { "epoch": 0.44, "learning_rate": 4.26984126984127e-05, "loss": 0.0441, "step": 11500 }, { "epoch": 0.46, "learning_rate": 4.2380952380952385e-05, "loss": 0.0382, "step": 12000 }, { "epoch": 0.48, "learning_rate": 4.2063492063492065e-05, "loss": 0.0428, "step": 12500 }, { "epoch": 0.5, "learning_rate": 4.174603174603175e-05, "loss": 0.038, "step": 13000 }, { "epoch": 0.51, "learning_rate": 4.1428571428571437e-05, "loss": 0.0416, "step": 13500 }, { "epoch": 0.53, "learning_rate": 4.111111111111111e-05, "loss": 0.0385, "step": 14000 }, { "epoch": 0.55, "learning_rate": 4.0793650793650795e-05, "loss": 0.0387, "step": 14500 }, { "epoch": 0.57, "learning_rate": 4.047619047619048e-05, "loss": 0.0392, "step": 15000 }, { "epoch": 0.59, "learning_rate": 4.015873015873016e-05, "loss": 0.0365, "step": 15500 }, { "epoch": 0.61, "learning_rate": 3.984126984126984e-05, "loss": 0.0409, "step": 16000 }, { "epoch": 0.63, "learning_rate": 3.9523809523809526e-05, "loss": 0.0385, "step": 16500 }, { "epoch": 0.65, "learning_rate": 3.9206349206349205e-05, "loss": 0.0361, "step": 17000 }, { "epoch": 0.67, "learning_rate": 3.888888888888889e-05, "loss": 0.0309, "step": 17500 }, { "epoch": 0.69, "learning_rate": 3.857142857142858e-05, "loss": 0.0348, "step": 18000 }, { "epoch": 0.7, "learning_rate": 3.8253968253968256e-05, "loss": 0.0321, "step": 18500 }, { "epoch": 0.72, "learning_rate": 3.7936507936507935e-05, "loss": 0.0331, "step": 19000 }, { "epoch": 0.74, "learning_rate": 3.761904761904762e-05, "loss": 0.0332, "step": 19500 }, { "epoch": 0.76, "learning_rate": 3.730158730158731e-05, "loss": 0.0304, "step": 20000 }, { "epoch": 0.78, "learning_rate": 3.6984126984126986e-05, "loss": 0.0348, "step": 20500 }, { "epoch": 0.8, "learning_rate": 3.6666666666666666e-05, "loss": 0.0328, "step": 21000 }, { "epoch": 0.82, "learning_rate": 3.634920634920635e-05, "loss": 0.0297, "step": 21500 }, { "epoch": 0.84, "learning_rate": 3.603174603174603e-05, "loss": 0.0316, "step": 22000 }, { "epoch": 0.86, "learning_rate": 3.571428571428572e-05, "loss": 0.0294, "step": 22500 }, { "epoch": 0.88, "learning_rate": 3.53968253968254e-05, "loss": 0.0311, "step": 23000 }, { "epoch": 0.9, "learning_rate": 3.5079365079365075e-05, "loss": 0.0281, "step": 23500 }, { "epoch": 0.91, "learning_rate": 3.476190476190476e-05, "loss": 0.0286, "step": 24000 }, { "epoch": 0.93, "learning_rate": 3.444444444444445e-05, "loss": 0.0316, "step": 24500 }, { "epoch": 0.95, "learning_rate": 3.412698412698413e-05, "loss": 0.0308, "step": 25000 }, { "epoch": 0.97, "learning_rate": 3.380952380952381e-05, "loss": 0.0263, "step": 25500 }, { "epoch": 0.99, "learning_rate": 3.349206349206349e-05, "loss": 0.0329, "step": 26000 }, { "epoch": 1.0, "eval_accuracy": 0.9930055555555556, "eval_f1-score": 0.9929596586748287, "eval_loss": 0.024911481887102127, "eval_runtime": 2756.559, "eval_samples_per_second": 261.195, "eval_steps_per_second": 2.041, "step": 26250 }, { "epoch": 1.01, "learning_rate": 3.317460317460318e-05, "loss": 0.0262, "step": 26500 }, { "epoch": 1.03, "learning_rate": 3.285714285714286e-05, "loss": 0.0259, "step": 27000 }, { "epoch": 1.05, "learning_rate": 3.253968253968254e-05, "loss": 0.0283, "step": 27500 }, { "epoch": 1.07, "learning_rate": 3.222222222222223e-05, "loss": 0.0264, "step": 28000 }, { "epoch": 1.09, "learning_rate": 3.19047619047619e-05, "loss": 0.0243, "step": 28500 }, { "epoch": 1.1, "learning_rate": 3.158730158730159e-05, "loss": 0.0275, "step": 29000 }, { "epoch": 1.12, "learning_rate": 3.1269841269841274e-05, "loss": 0.0284, "step": 29500 }, { "epoch": 1.14, "learning_rate": 3.095238095238095e-05, "loss": 0.0249, "step": 30000 }, { "epoch": 1.16, "learning_rate": 3.063492063492064e-05, "loss": 0.0267, "step": 30500 }, { "epoch": 1.18, "learning_rate": 3.0317460317460318e-05, "loss": 0.0283, "step": 31000 }, { "epoch": 1.2, "learning_rate": 3e-05, "loss": 0.0223, "step": 31500 }, { "epoch": 1.22, "learning_rate": 2.9682539682539683e-05, "loss": 0.0267, "step": 32000 }, { "epoch": 1.24, "learning_rate": 2.9365079365079366e-05, "loss": 0.0211, "step": 32500 }, { "epoch": 1.26, "learning_rate": 2.9047619047619052e-05, "loss": 0.0226, "step": 33000 }, { "epoch": 1.28, "learning_rate": 2.8730158730158728e-05, "loss": 0.0221, "step": 33500 }, { "epoch": 1.3, "learning_rate": 2.8412698412698414e-05, "loss": 0.0227, "step": 34000 }, { "epoch": 1.31, "learning_rate": 2.8095238095238096e-05, "loss": 0.0239, "step": 34500 }, { "epoch": 1.33, "learning_rate": 2.777777777777778e-05, "loss": 0.0254, "step": 35000 }, { "epoch": 1.35, "learning_rate": 2.7460317460317465e-05, "loss": 0.0236, "step": 35500 }, { "epoch": 1.37, "learning_rate": 2.714285714285714e-05, "loss": 0.0226, "step": 36000 }, { "epoch": 1.39, "learning_rate": 2.6825396825396827e-05, "loss": 0.021, "step": 36500 }, { "epoch": 1.41, "learning_rate": 2.650793650793651e-05, "loss": 0.0259, "step": 37000 }, { "epoch": 1.43, "learning_rate": 2.6190476190476192e-05, "loss": 0.0241, "step": 37500 }, { "epoch": 1.45, "learning_rate": 2.5873015873015878e-05, "loss": 0.0226, "step": 38000 }, { "epoch": 1.47, "learning_rate": 2.5555555555555554e-05, "loss": 0.023, "step": 38500 }, { "epoch": 1.49, "learning_rate": 2.523809523809524e-05, "loss": 0.0236, "step": 39000 }, { "epoch": 1.5, "learning_rate": 2.4920634920634923e-05, "loss": 0.0216, "step": 39500 }, { "epoch": 1.52, "learning_rate": 2.4603174603174602e-05, "loss": 0.0221, "step": 40000 }, { "epoch": 1.54, "learning_rate": 2.4285714285714288e-05, "loss": 0.0219, "step": 40500 }, { "epoch": 1.56, "learning_rate": 2.396825396825397e-05, "loss": 0.022, "step": 41000 }, { "epoch": 1.58, "learning_rate": 2.365079365079365e-05, "loss": 0.024, "step": 41500 }, { "epoch": 1.6, "learning_rate": 2.3333333333333336e-05, "loss": 0.021, "step": 42000 }, { "epoch": 1.62, "learning_rate": 2.3015873015873015e-05, "loss": 0.0219, "step": 42500 }, { "epoch": 1.64, "learning_rate": 2.2698412698412698e-05, "loss": 0.0199, "step": 43000 }, { "epoch": 1.66, "learning_rate": 2.2380952380952384e-05, "loss": 0.0208, "step": 43500 }, { "epoch": 1.68, "learning_rate": 2.2063492063492063e-05, "loss": 0.0198, "step": 44000 }, { "epoch": 1.7, "learning_rate": 2.174603174603175e-05, "loss": 0.0217, "step": 44500 }, { "epoch": 1.71, "learning_rate": 2.1428571428571428e-05, "loss": 0.0189, "step": 45000 }, { "epoch": 1.73, "learning_rate": 2.111111111111111e-05, "loss": 0.0199, "step": 45500 }, { "epoch": 1.75, "learning_rate": 2.0793650793650797e-05, "loss": 0.0208, "step": 46000 }, { "epoch": 1.77, "learning_rate": 2.0476190476190476e-05, "loss": 0.0196, "step": 46500 }, { "epoch": 1.79, "learning_rate": 2.015873015873016e-05, "loss": 0.0197, "step": 47000 }, { "epoch": 1.81, "learning_rate": 1.984126984126984e-05, "loss": 0.0197, "step": 47500 }, { "epoch": 1.83, "learning_rate": 1.9523809523809524e-05, "loss": 0.0184, "step": 48000 }, { "epoch": 1.85, "learning_rate": 1.920634920634921e-05, "loss": 0.0166, "step": 48500 }, { "epoch": 1.87, "learning_rate": 1.888888888888889e-05, "loss": 0.0217, "step": 49000 }, { "epoch": 1.89, "learning_rate": 1.8571428571428572e-05, "loss": 0.0189, "step": 49500 }, { "epoch": 1.9, "learning_rate": 1.8253968253968254e-05, "loss": 0.018, "step": 50000 }, { "epoch": 1.92, "learning_rate": 1.7936507936507937e-05, "loss": 0.0182, "step": 50500 }, { "epoch": 1.94, "learning_rate": 1.761904761904762e-05, "loss": 0.0176, "step": 51000 }, { "epoch": 1.96, "learning_rate": 1.7301587301587302e-05, "loss": 0.0163, "step": 51500 }, { "epoch": 1.98, "learning_rate": 1.6984126984126985e-05, "loss": 0.0174, "step": 52000 }, { "epoch": 2.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.0168, "step": 52500 }, { "epoch": 2.0, "eval_accuracy": 0.9950902777777778, "eval_f1-score": 0.99507649644856, "eval_loss": 0.01802189275622368, "eval_runtime": 2757.205, "eval_samples_per_second": 261.134, "eval_steps_per_second": 2.04, "step": 52500 }, { "epoch": 2.02, "learning_rate": 1.634920634920635e-05, "loss": 0.0165, "step": 53000 }, { "epoch": 2.04, "learning_rate": 1.6031746031746033e-05, "loss": 0.0165, "step": 53500 }, { "epoch": 2.06, "learning_rate": 1.5714285714285715e-05, "loss": 0.0168, "step": 54000 }, { "epoch": 2.08, "learning_rate": 1.5396825396825398e-05, "loss": 0.0169, "step": 54500 }, { "epoch": 2.1, "learning_rate": 1.5079365079365079e-05, "loss": 0.0149, "step": 55000 }, { "epoch": 2.11, "learning_rate": 1.4761904761904763e-05, "loss": 0.0157, "step": 55500 }, { "epoch": 2.13, "learning_rate": 1.4444444444444444e-05, "loss": 0.0146, "step": 56000 }, { "epoch": 2.15, "learning_rate": 1.4126984126984127e-05, "loss": 0.0139, "step": 56500 }, { "epoch": 2.17, "learning_rate": 1.3809523809523811e-05, "loss": 0.0158, "step": 57000 }, { "epoch": 2.19, "learning_rate": 1.3492063492063492e-05, "loss": 0.0148, "step": 57500 }, { "epoch": 2.21, "learning_rate": 1.3174603174603176e-05, "loss": 0.0153, "step": 58000 }, { "epoch": 2.23, "learning_rate": 1.2857142857142857e-05, "loss": 0.0154, "step": 58500 }, { "epoch": 2.25, "learning_rate": 1.253968253968254e-05, "loss": 0.016, "step": 59000 }, { "epoch": 2.27, "learning_rate": 1.2222222222222222e-05, "loss": 0.0146, "step": 59500 }, { "epoch": 2.29, "learning_rate": 1.1904761904761905e-05, "loss": 0.0162, "step": 60000 }, { "epoch": 2.3, "learning_rate": 1.1587301587301588e-05, "loss": 0.0156, "step": 60500 }, { "epoch": 2.32, "learning_rate": 1.126984126984127e-05, "loss": 0.0151, "step": 61000 }, { "epoch": 2.34, "learning_rate": 1.0952380952380953e-05, "loss": 0.0138, "step": 61500 }, { "epoch": 2.36, "learning_rate": 1.0634920634920636e-05, "loss": 0.0144, "step": 62000 }, { "epoch": 2.38, "learning_rate": 1.0317460317460318e-05, "loss": 0.0151, "step": 62500 }, { "epoch": 2.4, "learning_rate": 1e-05, "loss": 0.0163, "step": 63000 }, { "epoch": 2.42, "learning_rate": 9.682539682539683e-06, "loss": 0.0131, "step": 63500 }, { "epoch": 2.44, "learning_rate": 9.365079365079366e-06, "loss": 0.015, "step": 64000 }, { "epoch": 2.46, "learning_rate": 9.047619047619047e-06, "loss": 0.0142, "step": 64500 }, { "epoch": 2.48, "learning_rate": 8.73015873015873e-06, "loss": 0.0145, "step": 65000 }, { "epoch": 2.5, "learning_rate": 8.412698412698414e-06, "loss": 0.0147, "step": 65500 }, { "epoch": 2.51, "learning_rate": 8.095238095238097e-06, "loss": 0.0163, "step": 66000 }, { "epoch": 2.53, "learning_rate": 7.777777777777777e-06, "loss": 0.0138, "step": 66500 }, { "epoch": 2.55, "learning_rate": 7.460317460317461e-06, "loss": 0.0145, "step": 67000 }, { "epoch": 2.57, "learning_rate": 7.142857142857143e-06, "loss": 0.0135, "step": 67500 }, { "epoch": 2.59, "learning_rate": 6.825396825396825e-06, "loss": 0.0141, "step": 68000 }, { "epoch": 2.61, "learning_rate": 6.507936507936509e-06, "loss": 0.013, "step": 68500 }, { "epoch": 2.63, "learning_rate": 6.190476190476191e-06, "loss": 0.0125, "step": 69000 }, { "epoch": 2.65, "learning_rate": 5.873015873015873e-06, "loss": 0.0116, "step": 69500 }, { "epoch": 2.67, "learning_rate": 5.555555555555556e-06, "loss": 0.0165, "step": 70000 }, { "epoch": 2.69, "learning_rate": 5.2380952380952384e-06, "loss": 0.0135, "step": 70500 }, { "epoch": 2.7, "learning_rate": 4.920634920634921e-06, "loss": 0.012, "step": 71000 }, { "epoch": 2.72, "learning_rate": 4.603174603174604e-06, "loss": 0.0115, "step": 71500 }, { "epoch": 2.74, "learning_rate": 4.285714285714286e-06, "loss": 0.0137, "step": 72000 }, { "epoch": 2.76, "learning_rate": 3.968253968253968e-06, "loss": 0.0141, "step": 72500 }, { "epoch": 2.78, "learning_rate": 3.650793650793651e-06, "loss": 0.0112, "step": 73000 }, { "epoch": 2.8, "learning_rate": 3.3333333333333333e-06, "loss": 0.014, "step": 73500 }, { "epoch": 2.82, "learning_rate": 3.015873015873016e-06, "loss": 0.0123, "step": 74000 }, { "epoch": 2.84, "learning_rate": 2.6984126984126986e-06, "loss": 0.0151, "step": 74500 }, { "epoch": 2.86, "learning_rate": 2.3809523809523808e-06, "loss": 0.0141, "step": 75000 }, { "epoch": 2.88, "learning_rate": 2.0634920634920634e-06, "loss": 0.0134, "step": 75500 }, { "epoch": 2.9, "learning_rate": 1.7460317460317462e-06, "loss": 0.0124, "step": 76000 }, { "epoch": 2.91, "learning_rate": 1.4285714285714286e-06, "loss": 0.011, "step": 76500 }, { "epoch": 2.93, "learning_rate": 1.1111111111111112e-06, "loss": 0.0113, "step": 77000 }, { "epoch": 2.95, "learning_rate": 7.936507936507937e-07, "loss": 0.0123, "step": 77500 }, { "epoch": 2.97, "learning_rate": 4.761904761904763e-07, "loss": 0.0119, "step": 78000 }, { "epoch": 2.99, "learning_rate": 1.5873015873015874e-07, "loss": 0.011, "step": 78500 }, { "epoch": 3.0, "eval_accuracy": 0.9963097222222222, "eval_f1-score": 0.9963028995135582, "eval_loss": 0.013718551024794579, "eval_runtime": 2760.1832, "eval_samples_per_second": 260.852, "eval_steps_per_second": 2.038, "step": 78750 } ], "max_steps": 78750, "num_train_epochs": 3, "total_flos": 6.6789762859008e+17, "trial_name": null, "trial_params": null }